Trying to get Selenium to scrape table and click button - python

I hacked together the code below to try to scrape data from an HTML table, to a data frame, and then click a button to move to the next page, but it's giving me an error tat says 'invalid selector'.
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import time
from time import sleep
import pandas as pd
browser = webdriver.Chrome("C:/Utility/chromedriver.exe")
wait = WebDriverWait(browser, 10)
url = 'https://healthdata.gov/dataset/Hospital-Detail-Map/tagw-nk32'
browser.get(url)
for x in range(1, 5950, 13):
time.sleep(3) # wait page open complete
df = pd.read_html(browser.find_element_by_xpath("socrata-table frozen-columns").get_attribute('outerHTML'))[0]
submit_button = browser.find_elements_by_xpath('pager-button-next')[0]
submit_button.click()
I see the table, but I can't reference it.
Any idea what's wrong here?

I've managed to find button with find_elements_by_css_selector
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import time
from time import sleep
import pandas as pd
browser = webdriver.Chrome("C:/Utility/chromedriver.exe")
wait = WebDriverWait(browser, 10)
url = 'https://healthdata.gov/dataset/Hospital-Detail-Map/tagw-nk32'
browser.get(url)
for x in range(1, 5950, 13):
time.sleep(3) # wait page open complete
df = pd.read_html(
browser.find_element_by_xpath("socrata-table frozen-columns").get_attribute(
'outerHTML'))[0]
submit_button = browser.find_elements_by_css_selector('button.pager-button-next')[1]
submit_button.click()
Sometimes pagination hangs, and submit_button.click() ends with an error
selenium.common.exceptions.ElementClickInterceptedException:
Message: element click intercepted:
Element <button class="pager-button-next">...</button>
is not clickable at point (182, 637).
Other element would receive the click: <span class="site-name">...</span>
So consider to increase timeout. For example, you can use this approach
def click_timeout(element, timeout: int = 60):
for i in range(timeout):
time.sleep(1)
try:
element.click()
except WebDriverException:
pass
element.click()
So, you click an element as fast as it will be ready

Related

Selenium python: Difficulty Switching to frame on this page

I am finding it difficult to switch to iframe and click on O/U on this page. I need help with this guys!
Here is my code below;
from random import *
import random
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
driver.get("https://www.sportybet.com/ke/sporty-instant-virtuals/")
driver.maximize_window()
instantplay= driver.find_element(By.XPATH, "//span[text()='Instant Virtuals']")
instantplay.click()
# find the frame
frame2 = driver.find_element(By.XPATH, "//iframe[#id='instantwin-sport']")
time.sleep(3)
driver.switch_to.frame(frame2)
time.sleep(3)
# Click O/U
driver.find_element(By.XPATH, "//li[text()[normalize-space()='O/U']]")
driver.quit()
You never click on the element. Add .click() after finding the element.
You can also remove the sleep before the iframe and put an implicit wait to wait on the O/U button
driver.get("https://www.sportybet.com/ke/sporty-instant-virtuals/")
driver.maximize_window()
instantplay= driver.find_element(By.XPATH, "//span[text()='Instant Virtuals']")
instantplay.click()
# find the frame
frame2 = driver.find_element(By.XPATH, "//iframe[#id='instantwin-sport']")
driver.switch_to.frame(frame2)
# Click O/U
driver.implicitly_wait(10)
driver.find_element(By.XPATH, "//li[text()[normalize-space()='O/U']]").click()

element not clickable from dropmenu

using selenium, i try de choose a specific value in a drop menu,
but i have always an error
%reset -sf
site = 'https://www.mytek.tn/informatique/ordinateurs-portables/pc-portable.html'
driver.get(site)
sleep(1)
page_cat = requests.get(site)
tree_cat = html.fromstring(driver.page_source)
btn_all = tree_cat.xpath(".//option[#value='all']")
if len(btn_all) == 0:
print("btn all dont exist")
else:
print('choice all exist')
dropdown = Select(driver.find_element_by_id('limiter'))
dropdown.select_by_visible_text('Tous')
#dropdown.select_by_value('all') # same error : ElementNotInteractableException
i've tried de see if selenium can read all the element in the drop menu : yes
print("All selected options using ActionChains : \n")
for opt in dropdown.options:
print(opt.get_attribute('innerText'))
time.sleep(5)
always same error
ElementNotInteractableException: Message: element not interactable: Element is not currently visible and may not be manipulated
I'm going crazy
my imports:
#imports here
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import requests
import time
from time import sleep
from lxml import html
import logging as log
import pandas as pd
The drop down menu you trying to access appearing on the bottom of the page, not inside the initially visible screen.
To access it with Selenium you need first to scroll to that element.
Also, there are 2 selects there with similar locators while you need the second of them, so you should use corrected locator
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
wait = WebDriverWait(driver, 20)
dropdown = wait.until(EC.presence_of_element_located((By.XPATH, "(//select[#id='limiter'])[last()]")))
time.sleep(1)
actions.move_to_element(dropdown).perform()
time.sleep(0.5)
dropdown = Select(driver.find_element_by_xpath("(//select[#id='limiter'])[last()]"))
dropdown.select_by_visible_text('Tous')
I hope this will work for you.

Why i can't click this button on selenium webdriver python?

I cannot click some buttons in the router interface. I was only able to click through using pyautogui. But this method is not functional. How can I click this button on Selenium? I will use this code to reset my ip address.
This is the css code of the place I want to click:
Html Data:
https://mega.nz/file/2XJyEbCR#xBcEtzYh8QFLWTmSfAqll2V-p-SHiaw4wEz1RAWtso0
I tryied all method but not worked.
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).send_keys("\n")
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).send_keys(Keys.ENTER)
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].send_keys("\n")
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].send_keys(Keys.ENTER)
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].click()
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).click()
try this
link = driver.find_element_by_link_text('')
link.click()
you want to click link,
maybe this example helps you.
I solved. I should have used frames.
#Selenium
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.actions.interaction import KEY
#Beautiful Soup
from bs4 import BeautifulSoup
import lxml.html
import time
import pyautogui
def router_reset():
print ("Modem resetleniyor")
driver = webdriver.Chrome('C:/Anaconda3/chromedriver.exe')
driver.get('http://192.168.1.1/login.cgi')
username = driver.find_element_by_id('AuthName')
password = driver.find_element_by_id('AuthPassword')
login = driver.find_element_by_xpath("//*[#id='login']/fieldset/ul/li[6]/input")
username.send_keys("admin")
password.send_keys("turktelekom")
login.click()
time.sleep(1)
#Açılan Ekranı Atla Tuşu
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="login"]/fieldset/ul/li[3]/input[2]'))).click()
source = WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="network"]')))
#target = driver.find_element_by_id("div2")
target = WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="network"]')))
# Create the object for Action Chains
actions = ActionChains(driver)
actions.drag_and_drop(source, target)
# perform the operation on the element
actions.click(target)
actions.perform()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#network-broadband > a'))).click()
time.sleep(2)
WebDriverWait(driver,10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,'//*[#id="mainFrame"]')))
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).click()
driver.switch_to.default_content()
time.sleep(5)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.ui-dialog.ui-widget.ui-widget-content.ui-corner-all > div.ui-dialog-buttonpane.ui-widget-content.ui-helper-clearfix > button:nth-child(2)'))).click()
time.sleep(60)
print("Modeme Reset Atıldı, 60sn Bekleme Süresi Başladı.")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="logoutName"]'))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.ui-dialog.ui-widget.ui-widget-content.ui-corner-all > div.ui-dialog-buttonpane.ui-widget-content.ui-helper-clearfix > button:nth-child(2)'))).click()
router_reset()

Getting Dynamic Table Data With Selenium Python

So I am trying to parse this data from a dynamic table with selenium, it keeps getting the old data from page 1, I am trying to get gather pages 2's data, I've tried to search for other answers, but haven't found any, some say I need to add a wait period, and I did, however that didn't work.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Firefox()
browser.get('https://www.nyse.com/listings_directory/stock')
symbol_list=[]
table_data=browser.find_elements_by_xpath("//td");
def append_to_list(data):
for element in data:
symbol_list.append(element.text)
append_to_list(table_data)
pages=browser.find_elements_by_xpath('//a[#href="#"]')
for page in pages:
if(page.get_attribute("rel")== "next"):
if(page.text=="NEXT ›"):
page.click()
browser.implicitly_wait(100)
for elem in browser.find_elements_by_xpath("//td"): //still fetchs the data from page 1
print(elem.text)
#print(symbol_list)
I modified your script as below.
You should retrieve element in for loop or it will cause stale element reference exception.
And using WebDriverWait to wait for elements to be visible before find element.
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from time import sleep
browser = webdriver.Chrome()
browser.get('https://www.nyse.com/listings_directory/stock')
symbol_list = []
while True:
try:
table_data = WebDriverWait(browser, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//table//td")))
for i in range(1, len(table_data)+1):
td_text = browser.find_element_by_xpath("(//table//td)["+str(i)+"]").text
print(td_text)
symbol_list.append(td_text)
next_page = WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[#href="#" and contains(text(),"Next")]')))
next_clickable = next_page.find_element_by_xpath("..").get_attribute("class") # li
if next_clickable == 'disabled':
break
print("Go to next page ...")
next_page.click()
sleep(3)
except Exception as e:
print(e)
break

Retrive download link from URL

I'm trying to get the URL of a video, but every time it doesn't show in my output. I try request, urllib and even selenium, but it just doesn't show part of the code in my result, it's like it is blocked.
The url is https://unitplay.net/tt0089222, and here is my code:
from selenium import webdriver
browser=webdriver.Chrome('path/chromedriver.exe')
type(browser)
browser.get('https://unitplay.net/tt0089222')
elem = browser.page_source
print(elem)
browser.quit()
Here is the part it doesn't show and I want to get the src from it:
<div class="jw-media jw-reset"><video class="jw-video jw-reset" x-webkit-airplay="allow" webkit-playsinline="" playsinline="" preload="auto" jw-loaded="data" src="https://unitplay.net//file/others/DA6BB292BA130B6A825B62B96BD929F811EBF7BFEC748F8E2609004F5D96D0F5DD7025F4450289E31279E9F621883D048C869F15520DBE571D8FA35EBCCACD75" __idm_id__="64900097" jw-played=""></video></div>
You can wait for the element to appear using selenium.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome('path/chromedriver.exe')
browser.get('https://unitplay.net/tt0089222')
elem = browser.page_source
try:
element = WebDriverWait(browser, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "video"))
)
print(element.get_attribute("src"))
finally:
browser.quit()
This should tell selenium to wait up to 10 seconds for a video element to appear and then print out it's source.

Categories