I'm using Python for webscparing but the page has to be scrolled to load all the content so I use selenium.
I could make the first part work so the web driver launches, presses accept cookies and scrolls x times (in the code above is 2 times because I had to wait 5 minutes to get a blank list T_T)
from msilib.schema import Class
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
url = "https://www.aboutyou.es/c/hombre/zapatos-20215"
opt = webdriver.ChromeOptions()
opt.add_argument("start-maximized")
driver = webdriver.Chrome(options = opt)
driver.get(url)
cookies = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//button[#id="onetrust-accept-btn-handler"]'))
).click()
#element = driver.find_element(By.XPATH, '//button[#data.testid="loadMoreButton_100"]')
html = driver.find_element(By.TAG_NAME, 'html')
intentos = 2
try:
mas = WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.XPATH, '//button[#data-testid="loadMoreButton_100"]'))
).click()
except:
html.send_keys(Keys.PAGE_DOWN)
html.send_keys(Keys.PAGE_DOWN)
html.send_keys(Keys.PAGE_DOWN)
for i in range(intentos):
try:
mas = WebDriverWait(driver, 1).until(
EC.presence_of_element_located((By.XPATH, '//button[#data-testid="loadMoreButton_100"]'))
).click()
except:
html.send_keys(Keys.PAGE_DOWN)
html.send_keys(Keys.PAGE_DOWN)
html.send_keys(Keys.PAGE_DOWN)
html.send_keys(Keys.PAGE_DOWN)
if i < intentos - 1:
continue
grid_grande = driver.find_elements(By.XPATH,'//a[class="sc-16ol3xi-0 sc-163x4qs-0 fybchu loqbdm sc-nlxe42-2 fwTCrr"]')
print(grid_grande)
The element I want to select is the grid that contains all the other data, but I only get a blank list []:
<a data-testid="productTile-4218512" style="--product-tile-contents-height:112px" class="sc-16ol3xi-0 sc-163x4qs-0 fybchu loqbdm sc-nlxe42-2 fwTCrr" href="/p/panama-jack/botas-con-cordones-4218512"><div data-testid="productImage" class="sc-mt3y39-0 iYaafh">
<img height="100%" width="100%" decoding="async" importance="auto" loading="lazy" sizes="(max-width: 767px) calc(100vw / 3), calc(100vw / 4)" srcset="https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=160&width=120 120w, https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=480&width=360 360w, https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=534&width=400 400w, https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=800&width=600 600w, https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=1067&width=800 800w, https://cdn.aboutstatic.com/file/8742f6c70de3baecb60acc24c5f5d3d7?brightness=0.96&quality=75&trim=1&height=1280&width=960 960w" style="border-radius:2px" alt="PANAMA JACK - Botas con cordones en marrón: frente" data-testid="productImageView" class="sc-1876d5f-0-Component giShmP">
<div class="sc-1i699m5-0 eHwLkT"><div data-testid="badge-GENERIC" class="sc-1dqvaay-1 cHhMjt">Más sostenible</div></div>
</div><button type="button" data-testid="wishListButton" class="sc-1yegbck-0 cFfJJS sc-122ag38-0 eHyXBK sc-1ytk4ze-1 jrOzwu sc-1cy39j4-0 eCBNan"><svg class="sc-vu2m91-0 cXGjqJ sc-1ytk4ze-0 ebHRsM" data-testid="WishListIcon"><use xlink:href="#/assets/media/ic-heart.e31e11e8.svg"></use></svg><div class="sc-122ag38-1 ixqHjB"></div></button><div class="sc-nlxe42-0 kRHZwU"><div class="sc-1qsfqrd-0 xHpAu"><p data-testid="brandName" class="sc-1vt6vwe-0 sc-1vt6vwe-2 sc-1qsfqrd-1 dmJKga cyVcre gtGpeQ">PANAMA JACK</p><div class="sc-18q4lz4-2 cySBlJ sc-1qsfqrd-6 khWqDb" data-testid="priceBox"><span data-testid="finalPrice" class="sc-2qclq4-0 sc-18q4lz4-0 ePNAqF fbtbBY">169,00 €</span></div><div class="sc-1qsfqrd-7 eUQMHN"><ul data-testid="ColorContainer" class="sc-1qsfqrd-3 eSoPTy">
<li data-testid="ColorBubble-simple-#663300" class="sc-kt3zrg-0 sc-kt3zrg-1 jEkiIS dhRoGM sc-1qsfqrd-8 dYSOSZ"></li><li data-testid="ColorBubble-simple-#000000" class="sc-kt3zrg-0 sc-kt3zrg-1 jEkiIS gmeSfI sc-1qsfqrd-8 dYSOSZ"></li><li data-testid="ColorBubble-simple-#663300" class="sc-kt3zrg-0 sc-kt3zrg-1 jEkiIS dhRoGM sc-1qsfqrd-8 dYSOSZ"></li><li data-testid="ColorBubble-simple-#4c2002" class="sc-kt3zrg-0 sc-kt3zrg-1 jEkiIS hFwoRv sc-1qsfqrd-8 dYSOSZ"></li><li class="sc-1qsfqrd-4 glNrlz">+<!-- -->2</li></ul><span data-testid="Sizes" class="sc-1qsfqrd-5 gZDHxk">Disponible en muchas tallas</span></div></div></div></a>
wait=WebDriverWait(driver,60)
driver.get("https://www.aboutyou.es/c/hombre/zapatos-20215")
wait.until(EC.element_to_be_clickable((By.XPATH, "//button[#id='onetrust-accept-btn-handler']"))).click()
wait.until(EC.element_to_be_clickable((By.XPATH, "//*[#id='modalContent']/div[1]/*[name()='svg']"))).click()
SCROLL_PAUSE_TIME = 3
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
elems=wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//a[starts-with(#data-testid,'productTile')]")))
for elem in elems:
print(elem.get_attribute('outerHTML'))
I'm not sure what your expected output is so I just grabbed all the a tags with those product tiles.
The key issue would be waiting for visibility of your elements to come up and then grabbing the data.
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
Related
I am trying to scrape odds from https://en.stoiximan.gr/live. While my code is working, I get an error for having uneven lists in my final dataframe. Unfortunately, stoiximan seems to place 3-way odds together with over/under odds and suspended/locked matches (as in the picture).
What I am trying to do is to delete both home and away teams from their respective lists if their odds are over/under or locked. Any suggestions?
Here 's my code so far:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import openpyxl
import os
#launch chrome and keep window open
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
#visit en.stoiximan.gr and maximize window
driver.get("https://en.stoiximan.gr/live/")
driver.maximize_window()
#close modal window
try:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.XPATH, "//button[#class='sb-modal__close__btn uk-modal-close-default uk-icon uk-
close']"
))).click()
except:
pass
#accept cookies
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.ID, "onetrust-accept-btn-handler"
))).click()
#Initialize storage for stoiximan
stoiximan_home_teams_list = []
stoiximan_away_teams_list = []
stoiximan_home_odds_list = []
stoiximan_draw_odds_list = []
stoiximan_away_odds_list = []
#grab all home/away teams and explicit odds
try:
stoiximan_home_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-
row__container--row']/div[1]/a/div[1]/div[1]/span"))
)
stoiximan_away_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[1]/a/div[1]/div[2]/span"))
)
stoiximan_home_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[1]/span[2]"))
)
stoiximan_draw_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[2]/span[2]"))
)
stoiximan_away_odds = WebDriverWait(driver, 1).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[3]/span[2]"))
)
except:
driver.quit()
#loop each home team and append the lists
for stoiximan_home_team in stoiximan_home_teams:
stoiximan_home_teams_list.append(stoiximan_home_team.get_attribute('innerText'))
for stoiximan_away_team in stoiximan_away_teams:
stoiximan_away_teams_list.append(stoiximan_away_team.get_attribute('innerText'))
for stoiximan_home_odd in stoiximan_home_odds:
stoiximan_home_odds_list.append(stoiximan_home_odd.text)
for stoiximan_draw_odd in stoiximan_draw_odds:
stoiximan_draw_odds_list.append(stoiximan_draw_odd.text)
for stoiximan_away_odd in stoiximan_away_odds:
stoiximan_away_odds_list.append(stoiximan_away_odd.text)
print(stoiximan_home_teams_list)
print(len(stoiximan_home_teams_list))
print(stoiximan_away_teams_list)
print(len(stoiximan_away_teams_list))
print(stoiximan_home_odds_list)
print(len(stoiximan_home_odds_list))
print(stoiximan_draw_odds_list)
print(len(stoiximan_draw_odds_list))
print(stoiximan_away_odds_list)
print(len(stoiximan_away_odds_list))
#make str to float in odds lists
stoiximan_home_odds_list_float = [float(i) for i in stoiximan_home_odds_list]
stoiximan_draw_odds_list_float = [float(j) for j in stoiximan_draw_odds_list]
stoiximan_away_odds_list_float = [float(k) for k in stoiximan_away_odds_list]
#create dictionary for data
stoiximan_dict = {'Stoiximan Home Team': stoiximan_home_teams_list,
'Stoiximan Away Team': stoiximan_away_teams_list,
'Stoiximan Home Odd': stoiximan_home_odds_list_float,
'Stoiximan Draw Odd': stoiximan_draw_odds_list_float,
'Stoiximan Away Odd': stoiximan_away_odds_list_float
}
#create dataframe for data
df4 = pd.DataFrame(stoiximan_dict)
print(df4)
#write to excel file and open it
df4.to_excel(r'C:\Users\sweet_000\Desktop\data.xlsx', sheet_name="stoiximan", index=False)
os.system('start EXCEL.EXE "C:\\Users\\sweet_000\\Desktop\\data.xlsx"')
driver.quit()
Whenever I want selenium to press enter for me, it doesn't want to, get to the next page.
Is something wrong with the code?
from selenium import webdriver
from selenium.webdriver.common import keys
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
PATH = "C:\Pro\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://insurify.com")
try:
search = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "zipcodeInput"))
)
search.send_keys('34997')
search.send_keys(Keys.RETURN)
element1 = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span > input:nth-child(2)"))
)
element1.send_keys("2016")
element1.send_keys(Keys.RETURN)
time.sleep(30)
element2 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element2.send_keys('BMW')
element2.send_keys(Keys.RETURN)
element3 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element3.send_keys('4-Series')
element3.send_keys(Keys.RETURN)
element4 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element4.send_keys('428i')
element4.send_keys(Keys.RETURN)
time.sleep(50)
except:
driver.quit
Also there's a picture for the last code execution of the code.
By running driver.implicitly_wait(30) right after the definition of driver, we can get rid of all the commands WebDriverWait(driver, 30).until(EC.presence_of_element_located((...))). Moreover, with a proper use of find_element() and click() we can replace the blocks of code such as
element1 = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span > input:nth-child(2)")))
element1.send_keys("2016")
element1.send_keys(Keys.RETURN)
with a one line command. The final code is
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
driver = webdriver.Chrome(service=Service(your_chromedriver_path))
driver.implicitly_wait(30)
driver.get("https://insurify.com")
driver.find_element(By.CSS_SELECTOR, '#zipcodeInput').send_keys('34997')
driver.find_element(By.XPATH, '//button[text()="View my quotes"]').click()
driver.find_element(By.XPATH, '//div[text() = "2016"]').click()
driver.find_element(By.XPATH, '//span[text()="BMW"]').click()
driver.find_element(By.XPATH, '//div[text()="4-Series"]').click()
driver.find_element(By.XPATH, '//div[text() = "428i"]').click()
When I send text manually, suggestion area provided by first textbox , it works fine. But when I send the text using selenium library it cannot able to select the option from suggestion area, although that option is present inside the textbox. Is there any one who can help me out of that.
import time
from selenium.webdriver.support.ui import Select
path=r"C:\Users\AbdulRehman\Downloads\chromedriver_win32\chromedriver.exe"
# driver = webdriver.Chrome(path)
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get("https://www4.sii.cl/mapasui/internet/#/contenido/index.html")
try:
element = WebDriverWait(driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="ng-app"]/body/div[5]/div/div/div[3]/div/button'))
)
element.click()
print("prints its working fine now ..")
element = WebDriverWait(driver, 1000).until(
EC.presence_of_element_located((By.XPATH,'//*[#id="titulo"]/div[8]/i'))
)
element.click()
# element = WebDriverWait(driver, 1000).until(
# EC.presence_of_element_located((By.XPATH,'//*[#id="rolsearch"]/div[2]/div[1]/input'))
# )
# element.send_keys("PEDRO AGUIRRE CERD"+Keys.ENTER)
# search = WebDriverWait(driver, 60).until(
# EC.visibility_of_element_located((By.XPATH, '//*[#id="rolsearch"]/div[2]/div[1]/input'))
# )
# search.send_keys("EL MONTE" + Keys.ENTER)
# time.sleep(3)
search = WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, '//*[#id="rolsearch"]/div[2]/div[1]/input'))
)
ActionChains(driver).click(on_element=search).send_keys("EL MONTE").send_keys(Keys.ENTER).perform()
suggestion = WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, '//strong[text()="EL MONTE"]'))
)
suggestion.click()
# auto_complete = driver.find_elements_by_xpath('//*[#id="rolsearch"]/div[2]/div[1]/input')
# auto_complete[0].click()
# auto_complete.send_keys(Keys.RETURN)
# element.send_keys("somehting in text")
# search = driver.find_element_by_xpath().click()
# search.send_keys(Keys.RETURN)
search_1 = driver.find_element_by_xpath('//*[#id="rolsearch"]/div[2]/div[2]/input')
search_1.send_keys("PEDRO AGUIRRE CERDA")
search_1.send_keys(Keys.RETURN)
search_2 = driver.find_element_by_xpath('//*[#id="rolsearch"]/div[2]/div[3]/input')
search_2.send_keys("somehting in text")
search_2.send_keys(Keys.RETURN)
print("Its also working now ......")
time.sleep(3)
except Exception as e:
print(e)
driver.quit()
The desired element is a Angular element, so to send a character sequence to the element you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following Locator Strategy:
Using XPATH:
driver.get('https://www4.sii.cl/mapasui/internet/#/contenido/index.html')
WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Aceptar']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//i[#data-ng-click='mostrarBusquedaRol()']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[contains(#data-ng-include, '/mapasui/common/_content/busqueda-rol.html')]//div[#id='rolsearch']//label[contains(., 'Comuna')]//following::input[1]"))).send_keys("PEDRO AGUIRRE CERD" + Keys.ENTER)
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import pandas as pd
class FindByXpathCss():
# Declaring variables
Reviews = [] # List to store final set of reviews
reviewText = [] # List to store reviews extracted from XPath
reviewFullText = []
# Chromedriver path
driver = webdriver.Chrome(executable_path=r"F:\Chrome-webdriver\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)
# driver.execute_script("scrollBy(0,300);")
# Scrolling down
for i in range(20):
driver.find_element_by_xpath('//*[#id="yDmH0d"]').send_keys(Keys.ARROW_DOWN, i)
time.sleep(0.5)
# To click on Show more button
#btnShowMore = driver.find_element_by_xpath('//*[#id="fcxH9b"]/div[4]/c-wiz/div/div[2]''/div/div[1]/div/div/div[1]/div[2]/div[2]/div/span/span').click()
# Scrolling to top
for j in range(10):
driver.find_element_by_xpath('//*[#id="yDmH0d"]').send_keys(Keys.ARROW_UP, j)
#for i in range(10):
review_btn = driver.find_elements_by_xpath("//button[contains(#class,'')][contains(text(),'Full Review')]")
single_review_btn = driver.find_element_by_xpath("//button[contains(#class,'')][contains(text(),'Full Review')]")
#time.sleep(1)
The div html tag having 2 tags, one is having jsname as 'fbQN7e' which is there for holding the bigger reviews and those reviews will have button called "Full Review". Another one span within the same div html tag is 'bN97Pc' which is there to hold smaller reviews which wont have 'Full review' button at the end of this review. I couldn't get reviews of both types of span. Here I tried to write reviewFullText list directly to dataframe, but getting only element datatype, not text. I don't know why this too happening.
for btn in review_btn:
btn.click()
reviewFullText = driver.find_elements_by_css_selector("span[jsname='fbQN7e']")
#if(single_review_btn.is_enabled()==False):
#reviewText = driver.find_elements_by_css_selector("span[jsname=\"bN97Pc\"]")
##else:
#pass
# Iterating each reviews and appending into list Reviews
for txtreview in reviewText:
reviewFullText.append(txtreview.text)
print(len(reviewFullText))
# Writing the list values into csv file
df = pd.DataFrame(reviewFullText)
#df = pd.DataFrame({'Reviews': 'Reviews'}) #'Sentiment': 'null'})
df.to_csv('Reviews.csv', index=True, encoding='utf-8')
driver.close()
I have modified your solution to retrieve all review from the page.
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)
scrolls = 3
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
buttonClick = WebDriverWait(driver, 30).until(
EC.visibility_of_all_elements_located((By.XPATH, "//button[contains(#class,'')][contains(text(),'Full Review')]")))
for element in buttonClick:
driver.execute_script("arguments[0].click();", element)
reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[#class='UD7Dzf']")))
for textreview in reviewText:
print textreview.text
reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[#class='UD7Dzf']")))
# reviewText = driver.find_elements_by_xpath("//*[#class='UD7Dzf']")
for textreview in reviewText:
print textreview.text
Output:
from time import sleep
from webbrowser import Chrome
import selenium
from bs4 import BeautifulSoup as bsoup
import pandas as pd
from selenium import webdriver
class FindByXpathCss():
def test(self):
baseUrl = "https://play.google.com/store/apps/details?
id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver = webdriver.Chrome("F:\\Chrome-webdriver\\chromedriver")
driver.maximize_window()
driver.get(baseUrl)
Here I need to click on one button (Full review) to view the full review text.
fullReviewbtn = driver.find_element_by_css_selector('#fcxH9b > div.WpDbMd > c-wiz > div >
div.ZfcPIb > div > div.JNury.Ekdcne > div > div > div.W4P4ne > div:nth-child(2) > div >
div:nth-child(2) > div > div.d15Mdf.bAhLNe > div.UD7Dzf > span:nth-child(1) > div >
button').click()
sleep(1)
Here we are reading that full review text using an xpath, but I wish to read all other reviews of
the app, around 1200 reviews for this app alone. I wish to know how can i iterate it using for
loop here.
elementByXpath = driver.find_element_by_xpath('//*
[#id="fcxH9b"]/div[4]/c-wiz/div/div[2]/div/div[1]/div/div/div[1]/div[2]/div/div[2]/div/div[2]/div[2]').text
if elementByXpath is not None:
print("We found an element using Xpath")
#Review = elementByXpath.get_attribute("Review")
print(elementByXpath)
driver.close()
ff = FindByXpathCss()
ff.test()
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)
scrolls = 15
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
scrolls = 5
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[#class='UD7Dzf']")))
# reviewText = driver.find_elements_by_xpath("//*[#class='UD7Dzf']")
for textreview in reviewText:
print textreview.text