Trouble displaying links using python selenium - python

I'm trying to display a list of links that I parsed, it displays 1, when deriving a variable linkvideo2 I get the result in the form of the first link to the video. Maybe you can't use CSS_SELECTOR with a loop?)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
import time
name = 'hello world'
profile_path = r"C:/Users/Федорчик/Desktop"
options=Options()
options.set_preference('profile', profile_path)
driver = webdriver.Firefox(options=options)
#driver = webdriver.Firefox(r"C:/Users/Федорчик/Desktop")
driver.get('https://www.youtube.com')
id_serth = driver.find_element(By.NAME, "search_query")
id_serth.send_keys(name)
button_serth = driver.find_element(By.ID, "search-icon-legacy")
time.sleep(4)
button_serth.click()
time.sleep(4)
button_filtr = driver.find_element(By.CLASS_NAME ,"ytd-toggle-button-renderer")
button_filtr.click()
time.sleep(4)
button_filtrtode=driver.find_element(By.CLASS_NAME, "ytd-search-filter-renderer")
button_filtrtode.click()
time.sleep(4)
urltek = driver.current_url
linkvideo2 = driver.find_elements(By.CSS_SELECTOR, 'ytd-video-renderer.style-scope:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > div:nth-child(1) > h3:nth-child(1) > a:nth-child(2)')
links=[]
for i in linkvideo2:
links.append(i.get_attribute('href'))
print(len(links))
print (urltek)
Answer:
1
https://www.youtube.com/results?search_query=hello+world&sp=EgIIAQ%253D%253D
I will be very grateful for your help

better find by ID
linkvideo2 = driver.find_elements(By.ID, 'video-title')
links=[]
for i in linkvideo2:
links.append(i.get_attribute('href'))
print(len(links))
print(urltek)
print(links)
output of "links" will be something like this:
[None, None, None, None, None, 'https://www.youtube.com/shorts/uXMjlXBW_fg', 'https://www.youtube.com/watch?v=s0a7AWgo6CY', 'https://www.youtube.com/watch?v=SEgOG1fZwoM', 'https://www.youtube.com/watch?v=XXFdprzIxvc', 'https://www.youtube.com/watch?v=82YAs_viROM', 'https://www.youtube.com/watch?v=IgRObNaGhAg', 'https://www.youtube.com/watch?v=sFLPNXJU4KY', 'https://www.youtube.com/watch?v=64Wo6zQDgQ0', 'https://www.youtube.com/watch?v=SpvI9648RrU', 'https://www.youtube.com/watch?v=wUw-rh97fso', 'https://www.youtube.com/watch?v=C7XDmQsf1-A', 'https://www.youtube.com/watch?v=3md7eviZC_Y', 'https://www.youtube.com/watch?v=wklkXDhb6MQ', 'https://www.youtube.com/watch?v=82wxikm2CwI', 'https://www.youtube.com/watch?v=D1DVUYcE43A', 'https://www.youtube.com/watch?v=MvFgCJqpxWI', 'https://www.youtube.com/shorts/8cfy5DjIS10', 'https://www.youtube.com/watch?v=5RV3unTmyTA', 'https://www.youtube.com/watch?v=Sjgee-HCCxs', 'https://www.youtube.com/watch?v=vUGQsCQNUgs']

Related

How do I click and press keys using Selenium?

I'm trying to find a Google Place ID of Statue of Liberty National Monument from a Google's developer website: https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css
I want to click on the search bar where it says "Enter a location", type "Statue of Liberty National Monument", press down arrow key, and press enter key to get this result.
However, my code doesn't even click the search bar. Please help me.
import selenium
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
Google_IDS = ['https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css']
for Google_ID in Google_IDS:
driver.get(Google_ID)
driver.implicitly_wait(10)
Google_ID = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[4]/input')
Google_ID.click()
Google_ID.send_keys('Statue of Liberty National Monument')
Google_ID.send_keys('keys.DOWN')
Google_ID.send_keys('keys.ENTER')
Updated Code:
import selenium
import pandas as pd
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
wait = WebDriverWait(driver, 10)
Google_IDS = ['https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css']
for Google_ID in Google_IDS:
driver.get(Google_ID)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"div.devsite-article-body.clearfix > div > devsite-iframe > iframe")))
Google_ID = driver.find_element(By.CSS_SELECTOR, '#pac-input')
Google_ID.send_keys('Statue of Liberty National Monument')
dropdown = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.pac-container.pac-logo > div')))
dropdown.click()
ID_Info = driver.find_element(By.ID, "place-id")
print("Google ID:", ID_Info.text)
Try Use following code
PATH = "C:\Program Files(x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
Google_IDS = ['https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css']
for Google_ID in Google_IDS:
driver.get(Google_ID)
driver.implicitly_wait(10)
Google_ID = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[4]/input')
Google_ID.click()
Google_ID.send_keys('Statue of Liberty National Monument')
Google_ID.send_keys(Keys.DOWN)
Google_ID.send_keys(Keys.ENTER)
For more :https://pythonbasics.org/selenium-keyboard/
There is another solution with using Exmplity wait and also you don't need to press down key may just Enter key
PATH = "C:\Program Files(x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
Google_IDS = ['https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css']
element = WebDriverWait(driver, 10).until(
for Google_ID in Google_IDS:
driver.get(Google_ID)
EC.presence_of_element_located((By.CSS_SELECTOR,"div.devsite-article-body.clearfix > div > devsite-iframe > iframe"))
Google_ID = driver.find_element(By.CSS_SELECTOR,"div.devsite-article-body.clearfix > div > devsite-iframe > iframe")
Google_ID.click()
Google_ID.send_keys('Statue of Liberty National Monument')
Google_ID.send_keys(Keys.ENTER)
wait = WebDriverWait(driver, 10)
Google_IDS = ['https://developers.google.com/maps/documentation/javascript/examples/places-placeid-finder#maps_places_placeid_finder-css']
for Google_ID in Google_IDS:
driver.get(Google_ID)
wait.until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"div.devsite-article-body.clearfix > div > devsite-iframe > iframe")))
Google_ID = driver.find_element(By.CSS_SELECTOR, '#pac-input')
Google_ID.send_keys('Statue of Liberty National Monument')
dropdown = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.pac-container.pac-logo > div')))
dropdown.click()
Your element is in an iframe and the way to send non strings such as keys are as follows. Also don't use a generic xpath as those are brittle and generally will break instead use a more inclusive selector.
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Reading weblink from dataframe throws "stale element reference: element is not attached to the page document" error

I got a dataframe that contains links to google reviews of two restaurants. I wanted to load all reviews of two restaurants (one by one) into the browser and then save them into a new data frame. I wrote a script that reads and load all reviews into the browser as follow:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
link_df = Link
0 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
1 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
i = 0
driver = webdriver.Chrome()
for index, i in link_df.iterrows():
base_url = i['Link'] #link_df['Link'][i]
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
print('Restaurant number is ',index)
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text =driver.find_element_by_xpath("//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews +=1
reviews_info = driver.find_elements_by_xpath("//div[#class='jxjCjc']")
review_information = pd.DataFrame(columns=["Restaurant title","Restaurant rating","Total reviews","Reviewer Name","Rating", "Review"])
name= ''
rating = ''
text = ''
for index,review_info in enumerate(reviews_info):
name = review_info.find_element_by_xpath("./div/div/a").text
rating = review_info.find_element_by_xpath(".//div[#class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
text = review_info.find_element_by_xpath(".//div[#class='Jtu6Td']//span").text
review_information.at[len(review_information)] = [title,overall_rating,num_reviews,name,rating,text]
filename = 'Google_reviews' + ' ' +pd.to_datetime("now").strftime("%Y_%m_%d")+'.csv'
files_present = glob.glob(filename)
if files_present:
review_information.to_csv(filename,index=False,mode='a',header=False)
else:
review_information.to_csv(filename,index=False)
driver.get('https:ww.google.com')
time.sleep(3)
The problem is that script throws an error when it reaches the following line.
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
It throws following error:
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=95.0.4638.69)
When I tried the same program without storing google links in dataframe (i.e. no for loop and instead of base_url = i['Link'], I wrote base_url = google review link) it works fine.
I am not sure where I am making the mistake. Any suggestion or help to fix the issue would be highly appreciated?
EDIT
you put the creation of driver outside the for loop
you cant launch the new url with gps data when the first popup is always in front, if you launch it, it stays in backdoor, the easier way is to launch a new url without gps data -> https:ww.google.com and wait 3 dec before to follow your loop:
your count is not good, i have changed your selector and change the total and set some lines in comment
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options
import time
link_df = ["https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binary = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
options = Options()
options.binary = binary
driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
# i have to launch one time to accept the cookies manually
#by setting a breakpoint after, but you dont have that i think
#driver.get(link_df[0])
print ("Headless Firefox Initialized")
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Avis les plus récents']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
all_reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()
it seems the solution for chrome needs some fixes:
org.openqa.selenium.StaleElementReferenceException: stale element reference: element is not attached to the page document
The literal meaning is about , The referenced element is out of date , No longer attached to the current page . Usually , This is because the page has been refreshed or skipped , The solution is , Reuse findElement or findElements Method to locate the element .
so its seems for chrome there is a problem of refreshing, so i suggest to load the number of record before to scroll, to have a fresh copy of DOM items, and i have to add a wait 1sec at the end of while loop
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
#from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
import time
link_df = [
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binaryfirefox = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
binarychrome = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
options = Options()
#cap = DesiredCapabilities().CHROME
#cap["marionette"] = True
#cap = DesiredCapabilities().FIREFOX
#options.binary = binaryfirefox
#driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
options.binary_location = binarychrome
driver = webdriver.Chrome(options=options, executable_path="E:\\Téléchargement\\chromedriver.exe" )
# same reason tha Firefox i have to load one time
# an url to accept manually the cookies
#driver.get(link_df[0])
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Newest']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
#reload to avoid exception, or trap scroll with try/except but more expznsive
all_reviews = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
time.sleep(1)
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()

Python Selenium: extraction of rating given by individual reviewer

I am trying to extract google reviews of a resturant using Python Selenium. I tried to extract the reviews posted by each reviewers. Here is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
driver = webdriver.Chrome('')
base_url = 'https://www.google.com/search?tbs=lf:1,lf_ui:9&tbm=lcl&sxsrf=AOaemvJFjYToqQmQGGnZUovsXC1CObNK1g:1633336974491&q=10+famous+restaurants+in+Dunedin&rflfq=1&num=10&sa=X&ved=2ahUKEwiTsqaxrrDzAhXe4zgGHZPODcoQjGp6BAgKEGo&biw=1280&bih=557&dpr=2#lrd=0xa82eac0dc8bdbb4b:0x4fc9070ad0f2ac70,1,,,&rlfi=hd:;si:5749134142351780976,l,CiAxMCBmYW1vdXMgcmVzdGF1cmFudHMgaW4gRHVuZWRpbiJDUjEvZ2VvL3R5cGUvZXN0YWJsaXNobWVudF9wb2kvcG9wdWxhcl93aXRoX3RvdXJpc3Rz2gENCgcI5Q8QChgFEgIIFkiDlJ7y7YCAgAhaMhAAEAEQAhgCGAQiIDEwIGZhbW91cyByZXN0YXVyYW50cyBpbiBkdW5lZGluKgQIAxACkgESaXRhbGlhbl9yZXN0YXVyYW50mgEkQ2hkRFNVaE5NRzluUzBWSlEwRm5TVU56ZW5WaFVsOUJSUkFCqgEMEAEqCCIEZm9vZCgA,y,2qOYUvKQ1C8;mv:[[-45.8349553,170.6616387],[-45.9156414,170.4803685]]'
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
total_reviews_text =driver.find_element_by_xpath("//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
#all_reviews = driver.find_elements_by_css_selector('div.gws-localreviews__google-review')
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews +=5
review_info = driver.find_elements_by_xpath("//div[#class='PuaHbe']")
for person in person_infos:
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
print(rating)
However, the above code produces/print 'none'. I am not sure where I made the mistake. Any help to fix the issue would be appreciated.
You are using a wrong XPath locator.
Instead of
rating = person.find_element_by_xpath("./span").get_attribute('aria-label')
Try using
rating = person.find_element_by_xpath("./g-review-stars/span").get_attribute('aria-label')

Python Selenium. Parallel if loop

import csv
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from csv import reader
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
chrome_options = Options()
scroll = 5
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
header_added = False
header_added1 = False
url = "url"
driver = webdriver.Chrome(executable_path='C:/chromedriver.exe', options=chrome_options)
driver.maximize_window()
driver.get(url)
time.sleep(3)
search_city = input("Enter the city :")
res_n = input("Enter the Restaurant's name :")
search = driver.find_element_by_xpath('//input[#name="location"]').send_keys(search_city)
time.sleep(2)
driver.find_element_by_xpath('//*[#id="root"]/div[1]/div[1]/div/div[1]/div[1]/div/div[2]/div/div[3]/div[1]/span[2]').click()
time.sleep(3)
driver.find_element_by_xpath('/html/body/div[1]/div[1]/header/div/div/ul/li[5]/div/a/span[1]').click()
time.sleep(1)
search_res = driver.find_element_by_class_name('_2BJMh').send_keys(res_n.lower())
time.sleep(5)
driver.find_element_by_class_name('_2BJMh').send_keys(Keys.RETURN)
time.sleep(5)
try:
driver.find_element_by_class_name('_3FR5S').click()
time.sleep(5)
except:
print("restaurant not open")
driver.quit()
html = driver.find_element_by_tag_name('html')
def get_items():
global header_added
global item_dvs
cats = driver.find_elements_by_class_name('D_TFT')
cats[1].click()
time.sleep(3)
item_dvs = driver.find_elements_by_class_name('_2wg_t')
for div in item_dvs:
name = div.find_element_by_class_name('styles_itemNameText__3bcKX')
print(name.text)
price = div.find_element_by_class_name('rupee')
print(price.text)
if div.find_elements_by_class_name('styles_itemDesc__MTsVd'):
desc = div.find_element_by_class_name('styles_itemDesc__MTsVd').text
else:
desc = None
if div.find_element_by_css_selector('div._1C1Fl._23qjy'):
element = div.find_element_by_css_selector('div._1C1Fl._23qjy')
print("found")
driver.execute_script("arguments[0].scrollIntoView();", element)
add = div.find_element_by_css_selector('._1RPOp')
driver.execute_script("arguments[0].click();", add)
time.sleep(1)
add_ons = driver.find_element_by_class_name('_3UzO2').text
print(add_ons)
driver.find_element_by_css_selector('#modal-placeholder > div:nth-child(3) > div > div._1Kr-y._3EeZR > div > div._1EZLh > div > button').click()
else:
add_ons = None
dict1 = {'Item Name': name.text, "Price": price.text, "Add Ons :": add_ons, "Description": desc}
with open(f'{search_city}_{res_n}.csv', 'a+', encoding='utf-8-sig') as f:
w = csv.DictWriter(f, dict1.keys())
if not header_added:
w.writeheader()
header_added = True
w.writerow(dict1)
get_items()
The is_cust loop keeps running over and over again opening the same element, while the rest of the code moves on to the next divs. What is wrong here?
xPath are bidirectional and is probably the cause here.
Try this code using cssSelector:
for div in item_dvs:
#Do Something
try:
is_cust = div.find_element_by_css_selector('._1C1Fl._23qjy')
print("found")
except NoSuchElementException:
continue
driver.execute_script("arguments[0].scrollIntoView();", is_cust)
add = div.find_element_by_css_selector('._1RPOp')
driver.execute_script("arguments[0].click();", add)
time.sleep(1)
# Not sure why for this one you had driver instead of div. Suspect div should be
add_ons = div.find_element_by_class_name('_26cJ9').text
div.find_element_by_css_selector('#modal-placeholder > div:nth-child(3) > div > div._1Kr-y._3EeZR > div > div._1EZLh > div > button').click()
UPDATE
From your updated code, you are using lot of hardcoded sleep. I will suggest to use the WebDriverWait with expected_conditions.
More info here: Wait from Selenium
Imports needed:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
Code to be added post driver creation:
wait_time = 5
wait = WebDriverWait(driver, wait_time)
Instead of using sleep like this:
time.sleep(5)
driver.find_element_by_class_name('_2BJMh').send_keys(Keys.RETURN)
time.sleep(5)
Use:
wait.until(EC.presence_of_element_located((By.CLASS_NAME, '_2BJMh'))).send_keys(res_n.lower())
Don't gather the element twice.. use find_elements_by* then validate the length:
descs = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'styles_itemDesc__MTsVd')))
if len(descs) > 0:
desc = descs[0].text
else:
desc = None

Extracting Reviews of one particular app from Google Play Store using Selenium Python

from time import sleep
from webbrowser import Chrome
import selenium
from bs4 import BeautifulSoup as bsoup
import pandas as pd
from selenium import webdriver
class FindByXpathCss():
def test(self):
baseUrl = "https://play.google.com/store/apps/details?
id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver = webdriver.Chrome("F:\\Chrome-webdriver\\chromedriver")
driver.maximize_window()
driver.get(baseUrl)
Here I need to click on one button (Full review) to view the full review text.
fullReviewbtn = driver.find_element_by_css_selector('#fcxH9b > div.WpDbMd > c-wiz > div >
div.ZfcPIb > div > div.JNury.Ekdcne > div > div > div.W4P4ne > div:nth-child(2) > div >
div:nth-child(2) > div > div.d15Mdf.bAhLNe > div.UD7Dzf > span:nth-child(1) > div >
button').click()
sleep(1)
Here we are reading that full review text using an xpath, but I wish to read all other reviews of
the app, around 1200 reviews for this app alone. I wish to know how can i iterate it using for
loop here.
elementByXpath = driver.find_element_by_xpath('//*
[#id="fcxH9b"]/div[4]/c-wiz/div/div[2]/div/div[1]/div/div/div[1]/div[2]/div/div[2]/div/div[2]/div[2]').text
if elementByXpath is not None:
print("We found an element using Xpath")
#Review = elementByXpath.get_attribute("Review")
print(elementByXpath)
driver.close()
ff = FindByXpathCss()
ff.test()
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)
scrolls = 15
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
scrolls = 5
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[#class='UD7Dzf']")))
# reviewText = driver.find_elements_by_xpath("//*[#class='UD7Dzf']")
for textreview in reviewText:
print textreview.text

Categories