I'm trying to scrape Bet365 for it's live soccer odds. I'm iterating over a list of live matches available. For each match I need to click on the match and I'll be directed to some new content where I have all the detailed oddsinfo. From here it crashes when I go back to keep on my iterating.
It throws this error:
Traceback (most recent call last):
File "/Users/christian/Google Drev/Data Science/Bet365/main.py", line 32, in <module>
getScoreH = game.find_element_by_css_selector(scoreH).text
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 430, in find_element_by_css_selector
return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 654, in find_element
{"using": by, "value": value})['value']
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webelement.py", line 628, in _execute
return self._parent.execute(command, params)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/webdriver.py", line 320, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=69.0.3497.100)
(Driver info: chromedriver=2.42.591059 (a3d9684d10d61aa0c45f6723b327283be1ebaad8),platform=Mac OS X 10.14.0 x86_64)
The error comes from the last code in my main.py:
# HERE IT BREAKS!:
# Redirects to a games detailed odds page
game.find_element_by_css_selector(oddsBtn).click()
time.sleep(5)
# Go back and keep choose the click the next games details.
obj.find_element(overview).click()
time.sleep(5)
Below is my program. But as I said, the problem comes from the main.py's last few lines of code where I need to go back to an iteration. It seems like it doesn't remember where I left off.
cls_scraper.py:
"""
Class to find element(s) by css selector
"""
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
import platform
import time
import os
class Scraper():
def __init__(self, driver):
self.driver = driver
def wait(self, element):
return WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, element)))
def element_exist_css(self, element):
try:
self.driver.find_element_by_css_selector(element)
except NoSuchElementException:
print('Element doesnt exist')
return False
return True
def element_css(self, element):
try:
time.sleep(2)
return WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, element)))
except StaleElementReferenceException:
print("XB: StaleElementReferenceException")
except WebDriverException:
print("XB: WebDriverException")
def find_elements(self, element):
time.sleep(2)
return self.driver.find_elements_by_css_selector(element)
def find_element(self, element):
time.sleep(2)
return self.driver.find_element_by_css_selector(element)
str_elements.py:
"""
String library to have relevant css selector elements in one place.
"""
""" BET 365 - Overview-page """
# Enter the page
enterPage = '#TopPromotionMainArea'
# Page with live odds
inPlay = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Primary > div.hm-BigButtons > nav > a:nth-child(2)'
# Element containing relevent games and info about time, score etc, se below.
games = 'div.ipo-FixtureRenderer.ipo-Competition_Container > div'
# For each game in games, these elements can be found:
teamH = 'div.ipo-TeamStack > div:nth-child(1)'
teamA = 'div.ipo-TeamStack > div:nth-child(2)'
scoreH = 'div.ipo-TeamPoints_TeamScore.ipo-TeamPoints_TeamScore-teamone'
scoreA = 'div.ipo-TeamPoints_TeamScore.ipo-TeamPoints_TeamScore-teamtwo'
gameTime = 'div.ipo-InPlayTimer'
# The redirection in order to get all kinds of odds from a match
# Main overview-page only show a part of it.
oddsBtn = 'div.ipo-FixtureEventCountButton_EventCountWrapper'
# The overview tab to see all the live games
overview = 'div.ip-ControlBar > span.ip-ControlBar_ButtonBar > div:nth-child(1)'
# Choose english language
langTab = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Secondary > div.hm-HeaderModule_Menus > div.hm-LanguageDropDownSelections.hm-DropDownSelections > a'
pickEng = 'body > div:nth-child(1) > div > div:nth-child(1) > div > div.hm-HeaderModule_Secondary > div.hm-HeaderModule_Menus > div.hm-LanguageDropDownSelections.hm-DropDownSelections.hm-DropDownSelections_Selected > div > div > a:nth-child(1)'
# Get a better overview
allMarkets = 'body > div:nth-child(1) > div > div.wc-PageView > div.wc-PageView_Main.wc-InPlayPage_MainContainer > div > div > div.ipo-OverViewView > div > div > div > div.ipo-OverViewDetail > div.ipo-OverViewDetail_Container.ipo-Classification > div.ipo-ClassificationHeader_Header.ipo-ClassificationHeader_Header-1.ipo-ClassificationHeader_Header-lightgreenborder.ipo-ClassificationHeader_Header-moremarkets > div.ipo-ClassificationHeader_MarketsButtonOuterWrapper > div > div.ipo-ClassificationHeader_MarketsButton.ipo-ClassificationHeader_MarketsButton-transparent'
""" BET 365 - Odds-page """
# Collect all the odds from the redirection page.
main.py:
""" Run program from here """
from str_elements import *
from cls_scraper import *
from browser.path import *
import time
if __name__ == '__main__':
print("Welcome \n")
# Open website
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(driver_path, chrome_options=options)
driver.get('https://www.bet365.dk/#/HO/')
""" Click relevant elements """
print("Bet365: Pressing buttons ...")
obj = Scraper(driver)
obj.element_css(enterPage).click() # Enters the bet365 main page
obj.element_css(inPlay).click() # Presses the in-play tab
obj.element_css(langTab).click() # Choose languages
obj.element_css(pickEng).click() # Choose english
obj.element_css(overview).click() # Shows all live games
obj.element_css(allMarkets).click() # Better overview
print("Bet365: Collecting game data ...")
# All live games
liveContainer = obj.find_elements(games) # Contains a list of running games
for game in liveContainer:
getScoreH = game.find_element_by_css_selector(scoreH).text
getScoreA = game.find_element_by_css_selector(scoreA).text
getTeamH = game.find_element_by_css_selector(teamH).text
getTeamA = game.find_element_by_css_selector(teamA).text
getTime = game.find_element_by_css_selector(gameTime).text
print("Score: ", getScoreH, "-", getScoreA)
print("GameTime:", getTime)
print("HomeTeam:", getTeamH)
print("AwayTeam:", getTeamA)
print("")
## HERE IT BREAKS!:
## Redirects to a games detailed odds page
# game.find_element_by_css_selector(oddsBtn).click()
# time.sleep(5)
## Go back and keep choose the click the next games details.
# obj.find_element(overview).click()
# time.sleep(5)
Related
Using Selenium to interact with authors on Medium.com
I am trying to target a popup element with selenium that appears when text is double-clicked. Once this is detected and clicked it opens a message box to the right. My end goal has been to insert (send_keys) text into this text box and yet it is proving to be quite difficult.
[the dynamic element]1 # the element on the far right is the button to open the chat box.
enter image description here2 # this is the text box
WHAT I HAVE TRIED:
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
def interact_with_author(message):
# variables
target_class = "meteredContent"
body_css = "body"
header_xpath = "/html/head"
second_article_css = "#root > div > div.s.n.t > div.ah.ay > div > div.n.p > div > div:nth-child(2)" # the second article on the page
first_par = "#\39 993"
second_par = "#\35 f50"
first_par_css = "#root > div > div.s > article > div > section > div > div > p"
first_par_class = "ht hu dj hv b ei hw hx hy el hz ia ib ic id ie if ig ih ii ij ik il im in io db eg"
wait_time = 5 # seconds to wait when sleep is called with the wait_time variable
#code
text_box = driver.find_element_by_css_selector('body > div:nth-child(47) > div > div > div > div > div')
action = ActionChains(driver)
listing=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,"a")))
articles = driver.find_elements_by_tag_name("a")
an_article = driver.find_element_by_css_selector(second_article_css)
an_article.click()
time.sleep(wait_time) # todo change to sleep four seconds after the article is fully loaded
listing=WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.TAG_NAME,"p")))
try:
paragraphs = driver.find_elements_by_tag_name('p')
driver.execute_script("document.body.style.zoom='250%'")
try:
first_par = ''
for i in range(1,len(paragraphs)):
first_par_commentable = None
try:
first_par_commentable = driver.find_element_by_xpath(f"/html/body/div[1]/div/div[3]/article/div/section/div/div/p[{i}]")
driver.execute_script("document.body.style.zoom='200%'")
except Exception as e:
ic(e)
if first_par_commentable != None:
break
except Exception as f:
ic(f)
try:
first_par_commentable.click()
action.double_click(first_par_commentable).perform()
time.sleep(random.randint(1,3))
except Exception as e:
ic(e)
except Exception as e:
ic(e)
'''
If anyone knows how to access this element quickly and in a scaleable way it would be appreciated.
'''
I got 25 links like this
driver.find_element_by_css_selector('.v-table-table > tbody > tr:nth-child(i) > td:nth-child(4) > div > div > a').click()
and try to click them all by:
for i in range (1,25):
link = driver.find_element_by_css_selector('.v-table-table > tbody > tr:nth-child(i) > td:nth-child(4) > div > div > a')
link.click()
print(link)
time.sleep(1)
So i think that something wrong in my code, cause i get an error =)
Traceback (most recent call last):
File "/Users/admin/Desktop/python_1pk/get_response.py", line 29, in <module>
driver.find_element_by_css_selector('.v-table-table > tbody > tr:nth-child(i) > td:nth-child(4) > div > div > a').click()
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 598, in find_element_by_css_selector
return self.find_element(by=By.CSS_SELECTOR, value=css_selector)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 976, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.InvalidSelectorException: Message: invalid selector: An invalid or illegal selector was specified
(Session info: chrome=89.0.4389.82)
screenshot
I assume there's an issue with your CSS selector.
You are trying to form it as '.v-table-table > tbody > tr:nth-child(i) > td:nth-child(4) > div > div > a', but in this notation, the i variable does not get substituted to it's value.
Try using an f-string:
selector = f".v-table-table > tbody > tr:nth-child({i}) > td:nth-child(4) > div > div > a"
Note the curly braces around i - it's literally "place the value of the i variable here"
As I don't know how your page look like I assume it will be like this stackoverflow pages :https://stackoverflow.com/questions/tagged/python?tab=newest&page=2&pagesize=15 .
I tried to click on every link to do so I used the full xpath (you can use normal I think). With the xpath you find where is the article number so you can create a for loop and cThere is a code I tried I hope it will help you :
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome(Your path to the chromedriver)
url = "https://stackoverflow.com/questions/tagged/python?tab=newest&page=2&pagesize=15"
driver.get(url)
time.sleep(5)
"/html/body/div[3]/div[2]/div[1]/div[5]/div[1]/div/div[2]/h3/a" #xpath of the first title page
"/html/body/div[3]/div[2]/div[1]/div[5]/div[15]/div/div[2]/h3/a" #xpath of the last title page
for k in range(1,16):
driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[1]/div[5]/div["+str(k)+"]/div/div[2]/h3/a").click()
"/html/body/div[3]/div[2]/div[1]/div[5]/div[1]/div"
print("clicked")
time.sleep(2)
driver.get(url)lick on every article.
from time import sleep
from webbrowser import Chrome
import selenium
from bs4 import BeautifulSoup as bsoup
import pandas as pd
from selenium import webdriver
class FindByXpathCss():
def test(self):
baseUrl = "https://play.google.com/store/apps/details?
id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver = webdriver.Chrome("F:\\Chrome-webdriver\\chromedriver")
driver.maximize_window()
driver.get(baseUrl)
Here I need to click on one button (Full review) to view the full review text.
fullReviewbtn = driver.find_element_by_css_selector('#fcxH9b > div.WpDbMd > c-wiz > div >
div.ZfcPIb > div > div.JNury.Ekdcne > div > div > div.W4P4ne > div:nth-child(2) > div >
div:nth-child(2) > div > div.d15Mdf.bAhLNe > div.UD7Dzf > span:nth-child(1) > div >
button').click()
sleep(1)
Here we are reading that full review text using an xpath, but I wish to read all other reviews of
the app, around 1200 reviews for this app alone. I wish to know how can i iterate it using for
loop here.
elementByXpath = driver.find_element_by_xpath('//*
[#id="fcxH9b"]/div[4]/c-wiz/div/div[2]/div/div[1]/div/div/div[1]/div[2]/div/div[2]/div/div[2]/div[2]').text
if elementByXpath is not None:
print("We found an element using Xpath")
#Review = elementByXpath.get_attribute("Review")
print(elementByXpath)
driver.close()
ff = FindByXpathCss()
ff.test()
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
class FindByXpathCss():
driver = webdriver.Chrome(executable_path=r"C:\New folder\chromedriver.exe")
driver.maximize_window()
baseUrl = "https://play.google.com/store/apps/details?id=com.delta.mobile.android&hl=en_US&showAllReviews=true"
driver.get(baseUrl)
scrolls = 15
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
scrolls = 5
while True:
scrolls -= 1
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(3)
if scrolls < 0:
break
elemtn = WebDriverWait(driver, 30).until(
EC.element_to_be_clickable((By.XPATH, "//span[contains(#class,'RveJvd snByac')]")))
elemtn.click()
reviewText = WebDriverWait(driver, 30).until(
EC.presence_of_all_elements_located((By.XPATH, "//*[#class='UD7Dzf']")))
# reviewText = driver.find_elements_by_xpath("//*[#class='UD7Dzf']")
for textreview in reviewText:
print textreview.text
Why is it when I add time.sleep(2), I get my desired output but if I add wait until specific xpath it gives less results?
Output with time.sleep(2) (also desired):
Adelaide Utd
Tottenham
Dundee Fc
...
Count: 145 names
Remove time.sleep
Adelaide Utd
Tottenham
Dundee Fc
...
Count: 119 names
I have added:
clickMe = wait(driver, 13).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("#page-container > div:nth-child(4) > div > div.ubet-sports-section-page > div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div.page-title-new > h1"))))
As this element is present on all pages.
Seems to be significantly less. How can I get around this issue?
Script:
import csv
import os
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
driver.get('https://ubet.com/sports/soccer')
clickMe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//select[./option="Soccer"]/option'))))
options = driver.find_elements_by_xpath('//select[./option="Soccer"]/option')
indexes = [index for index in range(len(options))]
for index in indexes:
try:
try:
zz = wait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '(//select/optgroup/option)[%s]' % str(index + 1))))
zz.click()
except StaleElementReferenceException:
pass
from selenium.webdriver.support.ui import WebDriverWait
def find(driver):
pass
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
import time
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("#page-container > div:nth-child(4) > div > div.ubet-sports-section-page > div > div:nth-child(2) > div > div > div:nth-child(1) > div > div > div.page-title-new > h1"))))
langs0 = driver.find_elements_by_css_selector(
"div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(1) > div > div > div > div.lbl-offer > span")
langs0_text = []
for lang in langs0:
try:
langs0_text.append(lang.text)
except StaleElementReferenceException:
pass
directory = 'C:\\A.csv' #####################################
with open(directory, 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs0_text):
writer.writerow(row)
except StaleElementReferenceException:
pass
If you cannot access page, you need vpn.
Updating...
Perhaps that element loads before others. So if we changed it to datascraped (not all pages have data to be scraped).
Add:
try:
clickMe = wait(driver, 13).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ("div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(3) > div > div > div > div.lbl-offer > span"))))
except TimeoutException as ex:
pass
Same issue still present
Manual steps:
#Load driver.get('https://ubet.com/sports/soccer')
#Click drop down (//select/optgroup/option
#Wait for page elements so can scrape
Scrape:
div > div > div > div > div > div > div > div > div.row.collapse > div > div > div:nth-child(2) > div > div > div > div > div > div.row.small-collapse.medium-collapse > div:nth-child(1) > div > div > div > div.lbl-offer > span
Loop repeat.
The website is built on angularjs, so your best bet would be to wait until angular has finished processing of all AJAX requests (I won't go into the underlying mechanics, but there are plenty of materials on that topic throughout the web). For this, I usually define a custom expected condition to check while waiting:
class NgReady:
js = ('return (window.angular !== undefined) && '
'(angular.element(document).injector() !== undefined) && '
'(angular.element(document).injector().get("$http").pendingRequests.length === 0)')
def __call__(self, driver):
return driver.execute_script(self.js)
# NgReady does not have any internal state, so one instance
# can be reused for waiting multiple times
ng_ready = NgReady()
Now use it to wait after zz.click():
zz.click()
wait(driver, 10).until(ng_ready)
Tests
Your original code, unmodified (without sleeping or waiting with ng_ready):
$ python so-47954604.py && wc -l out.csv && rm out.csv
86 out.csv
Using time.sleep(10) after zz.click():
$ python so-47954604.py && wc -l out.csv && rm out.csv
101 out.csv
Same result when using wait(driver, 10).until(ng_ready) after zz.click():
$ python so-47954604.py && wc -l out.csv && rm out.csv
101 out.csv
Credits
NgReady is not my invention, I just ported it to python from the expected condition implemented in Java I found here, so all credits go to the author of the answer.
#hoefling idea is absolutely the correct one, but here is an addition to the "wait for Angular" part.
The logic used inside the NgReady only checks for angular to be defined and no pending requests left to be processed. Even though it works for this website, it's not a definite answer to the question of Angular being ready to work with.
If we look at what Protractor - the Angular end-to-end testing framework - does to "sync" with Angular, it is using this "Testability" API built into Angular.
There is also this pytractor package which extends selenium webdriver instances with a WebDriverMixin which would keep the sync between the driver and angular automatically on every interaction.
You can either start using pytractor directly (it is though abandonded as a package). Or, we can try and apply the ideas implemented there in order to always keep our webdriver synced with Angular. For that, let's create this waitForAngular.js script (we'll use only Angular 1 and 2 support logic only - we can always extend it by using the relevant Protractor's client side script):
try { return (function (rootSelector, callback) {
var el = document.querySelector(rootSelector);
try {
if (!window.angular) {
throw new Error('angular could not be found on the window');
}
if (angular.getTestability) {
angular.getTestability(el).whenStable(callback);
} else {
if (!angular.element(el).injector()) {
throw new Error('root element (' + rootSelector + ') has no injector.' +
' this may mean it is not inside ng-app.');
}
angular.element(el).injector().get('$browser').
notifyWhenNoOutstandingRequests(callback);
}
} catch (err) {
callback(err.message);
}
}).apply(this, arguments); }
catch(e) { throw (e instanceof Error) ? e : new Error(e); }
Then, let's inherit from webdriver.Chrome and patch the execute() method - so that every time there is an interaction, we additionally check if Angular is ready before the interaction:
import csv
from selenium import webdriver
from selenium.webdriver.remote.command import Command
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
COMMANDS_NEEDING_WAIT = [
Command.CLICK_ELEMENT,
Command.SEND_KEYS_TO_ELEMENT,
Command.GET_ELEMENT_TAG_NAME,
Command.GET_ELEMENT_VALUE_OF_CSS_PROPERTY,
Command.GET_ELEMENT_ATTRIBUTE,
Command.GET_ELEMENT_TEXT,
Command.GET_ELEMENT_SIZE,
Command.GET_ELEMENT_LOCATION,
Command.IS_ELEMENT_ENABLED,
Command.IS_ELEMENT_SELECTED,
Command.IS_ELEMENT_DISPLAYED,
Command.SUBMIT_ELEMENT,
Command.CLEAR_ELEMENT
]
class ChromeWithAngular(webdriver.Chrome):
def __init__(self, root_element, *args, **kwargs):
self.root_element = root_element
with open("waitForAngular.js") as f:
self.script = f.read()
super(ChromeWithAngular, self).__init__(*args, **kwargs)
def wait_for_angular(self):
self.execute_async_script(self.script, self.root_element)
def execute(self, driver_command, params=None):
if driver_command in COMMANDS_NEEDING_WAIT:
self.wait_for_angular()
return super(ChromeWithAngular, self).execute(driver_command, params=params)
driver = ChromeWithAngular(root_element='body')
# the rest of the code as is with what you had
Again, this is heavily insipred by the pytractor and protractor projects.
Is there a way to click elements outside line of vision in selenium?
I am trying to click all elements on the slider so I can scrape each page. However, the job gives me an error with:
Traceback (most recent call last):
File "C:/Users/Bain3/PycharmProjects/untitled4/TOPBETTA.py", line 1329, in <module>
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '("//*[#class="name"]//span")[%s]' % str(index + 1))))
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
The Href appears to be the default page as https://www.topbetta.com.au/sports/ as seen in the image so extracting the Href and navigating to each page individually is not possible.
I was able to scrape this with Winautomation. Any ideas on how I can do this with Selenium?
driver.execute_script('document.getElementByxpath("//[#class="name"]//span").style.visibility = "visible";')
The above code unfortunately did not help and gave:
Traceback (most recent call last):
File "C:/Users/Bain3/PycharmProjects/untitled4/TOPBETTA.py", line 1329, in <module>
driver.execute_script('document.getElementByxpath("//*[#class="name"]//span").style.visibility = "visible";')
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 532, in execute_script
'args': converted_args})['value']
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 297, in execute
self.error_handler.check_response(response)
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 194, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: Runtime.evaluate threw exception: SyntaxError: missing ) after argument list
(Session info: chrome=61.0.3163.100)
(Driver info: chromedriver=2.31.488763 (092de99f48a300323ecf8c2a4e2e7cab51de5ba8),platform=Windows NT 10.0.16299 x86_64)
Code:
try:
os.remove('vtg121.csv')
except OSError:
pass
driver.get('https://www.topbetta.com.au/sports/football/')
#SCROLL_PAUSE_TIME = 0.5
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#clickMe = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, ('//*[#id="TopPromotionBetNow"]'))))
#if driver.find_element_by_css_selector('#TopPromotionBetNow'):
#driver.find_element_by_css_selector('#TopPromotionBetNow').click()
#last_height = driver.execute_script("return document.body.scrollHeight")
#while True:
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#time.sleep(SCROLL_PAUSE_TIME)
#new_height = driver.execute_script("return document.body.scrollHeight")
#if new_height == last_height:
#break
#last_height = new_height
time.sleep(1)
#clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//div[text()="Soccer"][contains(#class, "wn-Classification")]'))))
#clickMe.click()
#time.sleep(0)
options = driver.find_elements_by_xpath('//*[#class="name"]//span')
indexes = [index for index in range(len(options))]
shuffle(indexes)
for index in indexes:
time.sleep(0)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '("//*[#class="name"]//span")[%s]' % str(index + 1))))
clickMe.click()
time.sleep(0)
# Team
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.home > div"))))
langs3 = driver.find_elements_by_css_selector("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.home > div")
langs3_text = []
for lang in langs3:
#print(lang.text)
langs3_text.append(lang.text)
time.sleep(0)
# Team ODDS
langs = driver.find_elements_by_css_selector(" #js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > div > div > div.js_teams-container.market-items > div.head-to-head-item.home > div > div > button.js_price-button.price")
langs_text = []
for lang in langs:
#print(lang.text)
langs_text.append(lang.text)
time.sleep(0)
# Draw odds
#langs1 = driver.find_elements_by_xpath("//ul[#class='runners']//li[2]")
langs1 = driver.find_elements_by_css_selector("#js_body-region > div > div:nth-child(1) > div.sports-body-region > div.js_event-list-region > div > div.js_events-container.events-container > div > div.js_event-region.event-region > div > a > div > div.team-container.away > div")
langs1_text = []
for lang in langs1:
#print(lang.text)
langs1_text.append(lang.text)
time.sleep(0)
# HREF
#langs2 = driver.find_elements_by_xpath("//ul[#class='runners']//li[1]")
url1 = driver.current_url
print(("NEW LINE BREAK"))
import sys
import io
with open('vtg121.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs_text, langs1_text, langs3_text):
writer.writerow(row + (url1,))
print(row + (url1,))
An element is not clickable when it is not visible in Selenium. You will have to use the scroll buttons to click on all the elements.
Try below solution to scrape required pages:
url = "https://www.topbetta.com.au/sports/football/"
driver.get(url)
counter = 0
for link in range(len(wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, '//a[#href="/sports" and ./div[#class="name"]]'))))):
wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask")))
link = wait(driver, 15).until(EC.presence_of_all_elements_located((By.XPATH, '//a[#href="/sports" and ./div[#class="name"]]')))[counter]
link.location_once_scrolled_into_view
link = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '(//a[#href="/sports" and ./div[#class="name"]])[%s]' % str(counter + 1))))
wait(driver, 15).until_not(EC.visibility_of_element_located((By.CLASS_NAME, "mask")))
link.click()
print(driver.current_url)
wait(driver, 10).until(EC.staleness_of(driver.find_element(By.XPATH, '//div[#class="competition-events-module"]')))
counter += 1
driver.get(url)
Just replace print(driver.current_url) with the code you want to apply to each page