I would like to download .csv reports for all states and all compliance periods from this web page.
In other words, the selenium script would select a state (for example, "DC") a reporting period (for example, "Jan 2021 - Dec 2021"), and then click "submit." THEN the script would export the results to a spreadsheet by clicking the image that says "CSV".
Ideally, the spreadsheet would do this for all states and all reporting periods. So at the end, my downloads folder would be full of spreadsheets.
I cannot, for the life of me, figure out how to get this to work!
This is what I have so far. There are no loops like I think there should be.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import chromedriver_autoinstaller
import time
import glob
import os
chromedriver_autoinstaller.install()
chromeOptions = webdriver.ChromeOptions()
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
url = "https://gats.pjm-eis.com/GATS2/PublicReports/RPSRetiredCertificatesReportingYear"
driver.get(url)
driver.find_element(By.CSS_SELECTOR, "table:nth-child(4)").click()
driver.find_element(By.ID, "SelectedState0_B-1").click()
driver.find_element(By.ID, "SelectedState0_DDD_L_LBI5T0").click()
driver.find_element(By.ID, "ReportingYear0_B-1").click()
driver.find_element(By.ID, "ReportingYear0_DDD_L_LBI0T0").click()
driver.find_element(By.CSS_SELECTOR, ".dx-vam:nth-child(2)").click()
driver.find_element(By.ID, "CSV0Img").click()
Thank you very much for your help! I truly appreciate it.
Here is the Solution!
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import glob
import os
chromeOptions = webdriver.ChromeOptions()
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
url = "https://gats.pjm-eis.com/GATS2/PublicReports/RPSRetiredCertificatesReportingYear"
state = 'DC' # Enter State Name Here
compliance_period = 'Jan 2020 - Dec 2020' # Enter Compliance Period Here
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, '(//*[#class="dxEditors_edtDropDown_GATS2"])[1]'))).click() # Clicking on Dropdown Arrow Down Icon
wait.until(EC.element_to_be_clickable((By.XPATH, '//tr[#class="dxeListBoxItemRow_GATS2"]//td[text()="' + state + '"]'))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, '(//*[#class="dxEditors_edtDropDown_GATS2"])[2]'))).click() # Clicking on Dropdown Arrow Down Icon
wait.until(EC.element_to_be_clickable((By.XPATH, '//tr[#class="dxeListBoxItemRow_GATS2"]//td[text()="' + compliance_period + '"]'))).click()
driver.find_element(By.XPATH, '//*[text()="Submit"]').click()
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="CSV0Img"]'))).click()
print("Successfully Downloaded!")
time.sleep(10)
driver.quit()
* Updated another Solution below as per the case mentioned in the comments where we've to make it loop through all the states and through all the compliance periods.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
chromeOptions = webdriver.ChromeOptions()
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
url = "https://gats.pjm-eis.com/GATS2/PublicReports/RPSRetiredCertificatesReportingYear"
driver.get(url)
count_state = len(driver.find_elements(By.XPATH, '//table[#id="SelectedState0_DDD_L_LBT"]//tr'))
for i in range(1, count_state + 1):
wait.until(EC.element_to_be_clickable((By.XPATH, '(//*[#class="dxEditors_edtDropDown_GATS2"])[1]'))).click() # Clicking on Dropdown Arrow Down Icon
wait.until(EC.element_to_be_clickable((By.XPATH, '(//table[#id="SelectedState0_DDD_L_LBT"]//tr)[' + str(i) + ']'))).click()
state_name = driver.find_element(By.XPATH, '(//table[#id="SelectedState0_DDD_L_LBT"]//tr/td)[' + str(i) + ']').get_attribute("textContent")
count_period = len(driver.find_elements(By.XPATH, '//table[#id="ReportingYear0_DDD_L_LBT"]//tr'))
for j in range(1, count_period + 1):
wait.until(EC.element_to_be_clickable((By.XPATH, '(//*[#class="dxEditors_edtDropDown_GATS2"])[2]'))).click() # Clicking on Dropdown Arrow Down Icon
wait.until(EC.element_to_be_clickable((By.XPATH, '(//table[#id="ReportingYear0_DDD_L_LBT"]//tr)[' + str(j) + ']'))).click()
driver.find_element(By.XPATH, '//*[text()="Submit"]').click()
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="CSV0Img"]'))).click()
compliance_period_name = driver.find_element(By.XPATH, '(//table[#id="ReportingYear0_DDD_L_LBT"]//tr/td)[' + str(j) + ']').get_attribute("textContent")
print("Successfully Downloaded for State:", state_name, " and Compliance Period: ", str(compliance_period_name))
print("\n")
time.sleep(10)
driver.quit()
I am trying to fill an form automatically in a website but i cannot find any specific identifier on input part. The problem starts after while section. Program clicks on the button but afterwise it cannot find the element. I tried to use XPATH, CSS but there is no succession. Site is opening an overlay but there is no iframe tags in HTML code so I am stucked.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
usernameStr = 'username'
passwordStr = 'password'
browser = webdriver.Chrome()
browser.get('https://allzinapp.com/vitrin/')
browser.maximize_window()
time.sleep(2)
username = browser.find_element(By.NAME, 'username')
username.send_keys(usernameStr)
password = browser.find_element(By.NAME, 'password')
password.send_keys(passwordStr)
button = browser.find_element(By.XPATH, '/html/body/vaadin-login-overlay-
wrapper/vaadin-login-form/vaadin-login-form-wrapper/form/vaadin-button')
button.click()
time.sleep(2)
browser.get('https://allzinapp.com/vitrin/tables')
time.sleep(2)
x = 0
while x < 40:
tableName = "T{}".format(x)
plusButton = browser.find_element(By.XPATH, '/html/body/div/flow-container-vitrin-816189098/vaadin-app-layout/vaadin-vertical-layout[2]/vaadin-form-layout[1]/div/vaadin-button[1]')
plusButton.click()
WebDriverWait(browser, 20).until(EC.visibility_of_element_located(By.XPATH, button))
fillSlot = browser.find_element(By.XPATH, '//input[#aria-labelledby="vaadin-text-field-label-3 vaadin-text-field-input-3"]').send_keys('Selam')
fillSlot.send_keys('Teras')
fillName = browser.find_element(By.NAME, 'Örn: A1')
fillName.send_keys(tableName)
confirmButton = browser.find_element(By.XPATH, '/html/body/vaadin-dialog-overlay/flow-component-renderer/div/vaadin-vertical-layout/vaadin-vertical-layout[1]/vaadin-horizontal-layout/vaadin-button[2]')
confirmButton.click()
time.sleep(2)
x += 1
My python selenium code is working fine. I am automating 3 web pages. In the second web page I have 5 radio buttons to click. My code works for 8-10 times but after that it gives me this error: selenium.common.exceptions.ElementClickInterceptedException: Message: element click intercepted: Element is not clickable at point (256, 10). Other element would receive the click Its bit strange because it works for some time like 5-8 times but after that it just stops working and gives me this error. How to fix this. I have tried the execute_script() method but still its not working.
The part which is giving me the error
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[1]/div[2]/div[1]/label[1]/input')
driver.execute_script("arguments[0].click();", radio_1)
radio_1.click()
Here is the full code
from sre_parse import State
from tkinter.tix import Select
from unicodedata import name
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.alert import Alert
import pandas as pd
import xlrd
import time
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
# readign the excel file
df = pd.read_excel('New1Rajnandgaon_List.xlsx')
# looping through all the data
for i in df.index:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), chrome_options=chrome_options)
alert = Alert(driver)
time.sleep(3)
driver.get("https://ssg2021.in/citizenfeedback")
time.sleep(3)
# selcting states
state_select = driver.find_element(By.XPATH,'//*[#id="State"]')
drp1 = Select(state_select)
drp1.select_by_visible_text('Chhattisgarh')
time.sleep(2)
# selecting district
district_select = driver.find_element(By.XPATH,'//*[#id="District"]')
drp2 = Select(district_select)
drp2.select_by_visible_text('RAJNANDGAON')
entry = df.loc[i]
# entering the age
age = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[1]/div/div/input')
age.send_keys(str(entry['age']))
# respondant name
rs_name = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[1]/div/input')
rs_name.send_keys(entry['name'])
# rs mobile number
rs_number = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[2]/div/input')
rs_number.send_keys(str(entry['mobile number']))
# rs gender
gender = driver.find_element(By.XPATH,'//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[3]/div/select')
rs_gender = Select(gender)
rs_gender.select_by_visible_text('Male')
# submitting the form
submit = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[5]/input')
submit.click()
time.sleep(2)
# second page
# radio button 1
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[1]/div[2]/div[1]/label[1]/input')
driver.execute_script("arguments[0].click();", radio_1)
radio_1.click()
# radio button 2
radio_2 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[2]/div[2]/div[1]/label[1]')
radio_2.click()
# radio button 3
radio_3 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[3]/div[2]/div[1]/label[1]')
radio_3.click()
# radio button 4
radio_4 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[4]/div[2]/div[1]/label[1]')
radio_4.click()
# radio button 5
radio_5 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[5]/div[2]/div[1]/label[1]')
radio_5.click()
submit2 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[2]/input')
submit2.click()
# alert
WebDriverWait(driver, 10).until(EC.alert_is_present())
alert = driver.switch_to.alert
alert.accept()
driver.close()
So I am using selenium for my web automation. I have a excel sheet with my data entries. Right now I am able to fill my excel entries but its filling only first entry after that my program just stops. I have more than 1,50,000 entries. I want my program to keep running until it fills all the entries in my form. How do I do that. Please help me !
My Entries in excel
My code
from sre_parse import State
from tkinter.tix import Select
from unicodedata import name
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import pandas as pd
import xlrd
import time
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), chrome_options=chrome_options)
driver.get("https://ssg2021.in/citizenfeedback")
# readign the excel file
df = pd.read_excel('Rajnandgaon_List2.xlsx')
# looping through all the data
for i in df.index:
time.sleep(3)
# selcting states
state_select = driver.find_element(By.XPATH,'//*[#id="State"]')
drp1 = Select(state_select)
drp1.select_by_visible_text('Chhattisgarh')
time.sleep(2)
# selecting district
district_select = driver.find_element(By.XPATH,'//*[#id="District"]')
drp2 = Select(district_select)
drp2.select_by_visible_text('RAJNANDGAON')
entry = df.loc[i]
# entering the age
age = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[1]/div/div/input')
age.send_keys(str(entry['age']))
# respondant name
rs_name = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[1]/div/input')
rs_name.send_keys(entry['name'])
# rs mobile number
rs_number = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[2]/div/input')
rs_number.send_keys(str(entry['mobile number']))
# rs gender
gender = driver.find_element(By.XPATH,'//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[3]/div/select')
rs_gender = Select(gender)
rs_gender.select_by_visible_text('Male')
# submitting the form
submit = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[5]/input')
submit.click()
# second page
# radio button 1
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[1]/div[2]/div[1]/label[1]/input')
radio_1.click()
# radio button 2
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[2]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 3
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[3]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 4
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[4]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 5
radio_1 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[5]/div[2]/div[1]/label[1]')
radio_1.click()
submit2 = driver.find_element(By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[2]/input')
submit2.click()
Your problem can be solve using below code:
from sre_parse import State
from tkinter.tix import Select
from unicodedata import name
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import pandas as pd
import xlrd
import time
import openpyxl
# readign the excel file
ExcelPath = 'Excel_Path'
df = pd.read_excel(ExcelPath)
wb = openpyxl.load_workbook(ExcelPath)
ws = wb.worksheets[0]
maxrow = ws.max_row
# looping through all the data
for i in range(2, maxrow + 1):
# Every Time Browser will open
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome()
driver.get("https://ssg2021.in/citizenfeedback")
time.sleep(3)
# selcting states
state_select = driver.find_element(By.XPATH, '//*[#id="State"]')
drp1 = Select(state_select)
drp1.select_by_visible_text('Chhattisgarh')
time.sleep(2)
# selecting district
district_select = driver.find_element(By.XPATH, '//*[#id="District"]')
drp2 = Select(district_select)
drp2.select_by_visible_text('RAJNANDGAON')
entry = df.loc[i]
# entering the age
age = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[1]/div/div/input')
age.send_keys(str(entry['age']))
# respondant name
rs_name = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[1]/div/input')
rs_name.send_keys(entry['name'])
# rs mobile number
rs_number = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[2]/div/input')
rs_number.send_keys(str(entry['mobile number']))
# rs gender
gender = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[4]/div[2]/div/div[3]/div/select')
rs_gender = Select(gender)
rs_gender.select_by_visible_text('Male')
# submitting the form
submit = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[1]/form/div[5]/input')
submit.click()
# second page
# radio button 1
radio_1 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[1]/div[2]/div[1]/label[1]/input')
radio_1.click()
# radio button 2
radio_1 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[2]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 3
radio_1 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[3]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 4
radio_1 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[4]/div[2]/div[1]/label[1]')
radio_1.click()
# radio button 5
radio_1 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[1]/div[5]/div[2]/div[1]/label[1]')
radio_1.click()
submit2 = driver.find_element(
By.XPATH, '//*[#id="zed_user_form"]/div/div[1]/div[2]/div/div/div[2]/form/div[2]/input')
submit2.click()
# After Clicking Submit Button Browser Will Quit
driver.quit()
I got a dataframe that contains links to google reviews of two restaurants. I wanted to load all reviews of two restaurants (one by one) into the browser and then save them into a new data frame. I wrote a script that reads and load all reviews into the browser as follow:
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
link_df = Link
0 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
1 https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]
i = 0
driver = webdriver.Chrome()
for index, i in link_df.iterrows():
base_url = i['Link'] #link_df['Link'][i]
driver.get(base_url)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//div[./span[text()='Newest']]"))).click()
print('Restaurant number is ',index)
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text =driver.find_element_by_xpath("//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int (total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
time.sleep(2)
total_reviews = len(all_reviews)
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
time.sleep(5)
all_reviews = WebDriverWait(driver, 5).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div.gws-localreviews__google-review')))
print(total_reviews)
total_reviews +=1
reviews_info = driver.find_elements_by_xpath("//div[#class='jxjCjc']")
review_information = pd.DataFrame(columns=["Restaurant title","Restaurant rating","Total reviews","Reviewer Name","Rating", "Review"])
name= ''
rating = ''
text = ''
for index,review_info in enumerate(reviews_info):
name = review_info.find_element_by_xpath("./div/div/a").text
rating = review_info.find_element_by_xpath(".//div[#class='PuaHbe']//g-review-stars//span").get_attribute('aria-label')
text = review_info.find_element_by_xpath(".//div[#class='Jtu6Td']//span").text
review_information.at[len(review_information)] = [title,overall_rating,num_reviews,name,rating,text]
filename = 'Google_reviews' + ' ' +pd.to_datetime("now").strftime("%Y_%m_%d")+'.csv'
files_present = glob.glob(filename)
if files_present:
review_information.to_csv(filename,index=False,mode='a',header=False)
else:
review_information.to_csv(filename,index=False)
driver.get('https:ww.google.com')
time.sleep(3)
The problem is that script throws an error when it reaches the following line.
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
It throws following error:
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=95.0.4638.69)
When I tried the same program without storing google links in dataframe (i.e. no for loop and instead of base_url = i['Link'], I wrote base_url = google review link) it works fine.
I am not sure where I am making the mistake. Any suggestion or help to fix the issue would be highly appreciated?
EDIT
you put the creation of driver outside the for loop
you cant launch the new url with gps data when the first popup is always in front, if you launch it, it stays in backdoor, the easier way is to launch a new url without gps data -> https:ww.google.com and wait 3 dec before to follow your loop:
your count is not good, i have changed your selector and change the total and set some lines in comment
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.options import Options
import time
link_df = ["https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binary = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
cap = DesiredCapabilities().FIREFOX
cap["marionette"] = True
options = Options()
options.binary = binary
driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
# i have to launch one time to accept the cookies manually
#by setting a breakpoint after, but you dont have that i think
#driver.get(link_df[0])
print ("Headless Firefox Initialized")
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Avis les plus récents']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
WebDriverWait(driver, 5, 0.25).until_not(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[class$="activityIndicator"]')))
all_reviews = WebDriverWait(driver, 5).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()
it seems the solution for chrome needs some fixes:
org.openqa.selenium.StaleElementReferenceException: stale element reference: element is not attached to the page document
The literal meaning is about , The referenced element is out of date , No longer attached to the current page . Usually , This is because the page has been refreshed or skipped , The solution is , Reuse findElement or findElements Method to locate the element .
so its seems for chrome there is a problem of refreshing, so i suggest to load the number of record before to scroll, to have a fresh copy of DOM items, and i have to add a wait 1sec at the end of while loop
from selenium import webdriver
import pandas as pd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
#from selenium.webdriver.firefox.options import Options
from selenium.webdriver.chrome.options import Options
import time
link_df = [
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318a3aa3041455:0x5f83f4fae76d8656,1,,,&rlfi=hd:;si:6882614014013965910,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEiglZKhm6qAgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSARJidXJtZXNlX3Jlc3RhdXJhbnSqAQwQASoIIgRmb29kKAA,y,UB2auy7TMYs;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]",
"https://www.google.com/search?q=restaurant+in+christchurch&biw=1280&bih=614&hotel_occupancy=2&tbm=lcl&sxsrf=AOaemvI4qlEAr3btedb6PCx9U53RtXkI2Q%3A1635630947742&ei=Y799YaHfLOKZ4-EPoeqjmA4&oq=restaurant+in+christchurch&gs_l=psy-ab.3...0.0.0.614264.0.0.0.0.0.0.0.0..0.0....0...1c..64.psy-ab..0.0.0....0.7jAOI05vCjI#lrd=0x6d318bf82139caaf:0xf115cd7fe794cbcc,1,,,&rlfi=hd:;si:17372017086881385420,l,ChpyZXN0YXVyYW50IGluIGNocmlzdGNodXJjaEjh9auu-q6AgAhaKBAAGAAYAiIacmVzdGF1cmFudCBpbiBjaHJpc3RjaHVyY2gqBAgDEACSAQpyZXN0YXVyYW50qgEMEAEqCCIEZm9vZCgA,y,ZeJbBWd7wDg;mv:[[-43.4870861,172.6509735],[-43.5490232,172.5976049]]"
]
i = 0
binaryfirefox = r'C:\Program Files (x86)\Mozilla Firefox\firefox.exe'
binarychrome = r'C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
options = Options()
#cap = DesiredCapabilities().CHROME
#cap["marionette"] = True
#cap = DesiredCapabilities().FIREFOX
#options.binary = binaryfirefox
#driver = webdriver.Firefox(options=options, capabilities=cap, executable_path="E:\\Téléchargement\\geckodriver.exe")
options.binary_location = binarychrome
driver = webdriver.Chrome(options=options, executable_path="E:\\Téléchargement\\chromedriver.exe" )
# same reason tha Firefox i have to load one time
# an url to accept manually the cookies
#driver.get(link_df[0])
print(link_df)
for url in link_df:
base_url = url # i['Link'] # link_df['Link'][i]
print(base_url)
driver.get(base_url)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[./span[text()='Newest']]"))).click()
title = driver.find_element_by_xpath("//div[#class='P5Bobd']").text
address = driver.find_element_by_xpath("//div[#class='T6pBCe']").text
overall_rating = driver.find_element_by_xpath("//div[#class='review-score-container']//span[#class='Aq14fc']").text
total_reviews_text = driver.find_element_by_xpath(
"//div[#class='review-score-container']//div//div//span//span[#class='z5jxId']").text
num_reviews = int(total_reviews_text.split()[0])
all_reviews = WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
# time.sleep(2)
total_reviews = 0
while total_reviews < num_reviews:
#reload to avoid exception, or trap scroll with try/except but more expznsive
all_reviews = WebDriverWait(driver, 20).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#reviewSort .gws-localreviews__google-review')))
driver.execute_script('arguments[0].scrollIntoView(true);', all_reviews[-1])
total_reviews = len(all_reviews)
print(total_reviews, len(all_reviews))
time.sleep(1)
driver.get('https:ww.google.com') # or driver.close() if no bugs
time.sleep(3)
driver.close()
driver.quit()