Extract title but give me wrong output using selenium

Extract title but give me wrong output using selenium - python

import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
#chrome to stay open to see what's happening in the real word or make it comment to close
options.add_experimental_option("detach", True)
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
URL ='https://advpalata.vrn.ru/registers/reestr_lawyers/'
driver.get(URL)
title=driver.find_element("xpath", '//ul[#class="letter-filter"]//li[1]')
title.click()
page_links = [element.get_attribute('href') for element in driver.find_elements(By.XPATH, "//td[#class='name']//a")]
for link in page_links:
driver.get(link)
time.sleep(2)
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h3"))).text)
driver.close()
I want to extract the name but they extract the name in different format they will give me output like these page link is https://advpalata.vrn.ru/registers/reestr_lawyers/abdullaev_parviz_zairhan_ogly/
\xd0\x90\xd0\xb1\xd0\xb0\xd0\xba\xd1\x83\xd0\xbc\xd0\xbe\xd0\xb2
but I want output these:
Абдуллаев Парвиз Заирхан оглы

The WebElements are dynamically loaded. So you need to wait for the elements/texts to completely load before you attempt to extract them. Moreover you don't need to explicitly encode to utf-8 as by default Python uses utf-8 encoding.
Solution
To print the name ideally you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using TAG_NAME:
#_*_coding: utf-8_*_
# driver.execute("get", {'url': 'https://advpalata.vrn.ru/registers/reestr_lawyers/abdullaev_parviz_zairhan_ogly/'})
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.TAG_NAME, "h3"))).text)
Using CSS_SELECTOR:
#_*_coding: utf-8_*_
# driver.execute("get", {'url': 'https://advpalata.vrn.ru/registers/reestr_lawyers/abdullaev_parviz_zairhan_ogly/'})
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "h3"))).text)
Using XPATH:
#_*_coding: utf-8_*_
# driver.execute("get", {'url': 'https://advpalata.vrn.ru/registers/reestr_lawyers/abdullaev_parviz_zairhan_ogly/'})
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//h3"))).text)
Console Output:
Абдуллаев Парвиз Заирхан оглы
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Related

Issues scraping a dynamic table displaying available and booked time slots

I want to scrape the following website:https://padelbox.de/koeln-weiden/padelplatz-buchen. I want to scrape the planning tool every day and see what slots are booked and which are not. However, using the code below an error code suggesting the values are not found. Can anyone help me with this?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import requests
import time
website = 'https://padelbox.de/koeln-weiden/padelplatz-buchen'
path = '/Users/joeplamers/Downloads/chromedriver_mac_arm64/chromedriver'
# Initialize the webdriver
driver = webdriver.Chrome(path)
# Open the website
driver.get(website)
#close the cookie pop-up and maximize window
all_matches_button = driver.find_element(By.XPATH, '//a[#class="_brlbs-btn _brlbs-btn-accept- all _brlbs-cursor"]')
all_matches_button.click()
driver.maximize_window()
wait = WebDriverWait(driver, 60)
wait.until(ec.presence_of_element_located((By.CSS_SELECTOR,'[data-state="booked"]')))
booked_elements = driver.find_elements(By.CSS_SELECTOR,'[data-state="booked"]')
print(booked_elements)
#Close the browser
driver.quit()

The desired elements are within an <iframe> so you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired element to be visible.
You can use the following locator strategies:
driver.get("https://padelbox.de/koeln-weiden/padelplatz-buchen")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a._brlbs-btn-accept-all"))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[src^='https://www.eversports.de/widget']")))
print(len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "td[data-state='booked'][data-date]")))))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Console Output:
318
Update
Here's the complete code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_experimental_option('useAutomationExtension', False)
options.add_argument('--disable-blink-features=AutomationControlled')
s = Service('C:\\BrowserDrivers\\chromedriver.exe')
driver = webdriver.Chrome(service=s, options=options)
driver.get("https://padelbox.de/koeln-weiden/padelplatz-buchen")
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a._brlbs-btn-accept-all"))).click()
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe[src^='https://www.eversports.de/widget']")))
print(len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "td[data-state='booked'][data-date]")))))
driver.quit()

Clicking Accept cookies button using Selenium

I am trying to automate some download of data and have an issue with accepting the cookies message.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from random import randint
import time
current_link = "https://platform.smapone.com/Portal/Account/Login?ReturnUrl=%2FPortal%2F"
driver = webdriver.Chrome(PATH)
driver.maximize_window()
driver.implicitly_wait(10)
driver.get(current_link)
#driver.switch_to.frame("cookieConsentIframe")
#driver.switch_to.frame(driver.find_element_by_name('cookieConsentIframe'))
try:
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='cookieConsentIframe']")))
print(1)
driver.find_element(By.XPATH,'//button[#id="cookies-accept-all"]').click()
#driver.find_element(By.XPATH,'//button[text()="Accept"]').click()
except:
pass
time.sleep(randint(5,8))
driver.quit()
The code runs through (prints also the 1) but never clicks the button. Any suggestions? Tried so many things already.

You need to also wait out the button:
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'//button[#id="cookies-accept-all"]'))).click()
EDIT Here is a full example (selenium/chromedriver setup is for linux, but you need to observe only the imports, and part after defining the browser):
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support import expected_conditions as EC
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
url = 'https://platform.smapone.com/Portal/Account/Login?ReturnUrl=%2FPortal%2F'
browser.get(url)
WebDriverWait(browser, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='cookieConsentIframe']")))
## sortout cookie button
try:
WebDriverWait(browser, 10).until(EC.element_to_be_clickable((By.XPATH,'//button[#id="cookies-accept-all"]'))).click()
print("accepted cookies")
except Exception as e:
print('no cookie button')

The element Accept is within an <iframe> so you have to:
Induce WebDriverWait for the desired frame to be available and switch to it.
Induce WebDriverWait for the desired element to be clickable.
You can use either of the following locator strategies:
Using CSS_SELECTOR:
driver.execute("get", {'url': 'https://platform.smapone.com/Portal/Account/Login?ReturnUrl=%2FPortal%2F'})
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe.cookieConsent#cookieConsentIframe")))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button#cookies-accept-all span.btn-accept"))).click()
Using XPATH:
driver.execute("get", {'url': 'https://platform.smapone.com/Portal/Account/Login?ReturnUrl=%2FPortal%2F'})
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#class='cookieConsent' and #id='cookieConsentIframe']")))
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[#id='cookies-accept-all']//span[#class='btn-accept']"))).click()
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:

Selenium click()

"""
Web scraping the wikipidia page
"""
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://en.wikipedia.org/wiki/Main_Page")
num_articles1 = driver.find_element(By.CSS_SELECTOR, '#articlecount a')
print(num_articles1.text)
num_articles1.click()
driver.close()
Question: num_articles1 returns the value but why is the click() not working ?
can't understand why is this happening, what am i missing?

On my Windows 10 using latest Selenium, ChromeDriver and Chrome the number of articles i.e. 6,559,615 gets printed perfectly and the click() is also performed perfecto.
However, to click on the clickable element ideally you need to induce WebDriverWait for the element_to_be_clickable() and you can use either of the following locator strategies:
Using CSS_SELECTOR:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#articlecount a"))).click()
Using XPATH:
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[#id='articlecount']/a"))).click()
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Getting text from a <pre> object with Selenium

I'm trying to get the text inside of <pre> tag and I have tried with get_attribute('text'), get_attribute('value'), .text(), .value(), get_attribute("innerHTML") but I keep failing:
Snapshot:
This is the code that i'm using:
import unittest
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome("chromedriver.exe")
driver.get("###")
elem=driver.find_element(By.ID, "login_admin").click()
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[2]/form/div[1]/input").send_keys("###")
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[2]/form/div[2]/input").send_keys("###")
elem=driver.find_element(By.XPATH, "/html/body/div[15]/div[3]/div/button[1]").click()
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[1]/button/span[3]').click()
time.sleep(2)
elem = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[2]/ul/li/a')))
time.sleep(2)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/nav/div/div[2]/ul/li/a').click()
time.sleep(2)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div[2]/div/span[1]/input[2]').send_keys('###')
time.sleep(1)
elem=driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/div/div/div/div/div/div/div[2]/div/div/div/div/div/div/div[2]/div/span[2]/button').click()
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='output']/pre[text()]"))).get_attribute("innerHTML"))
And this is what it says when print:

To print the text within the <pre> tag you can use either of the following locator strategies:
Using css_selector and get_attribute("innerHTML"):
print(driver.find_element(By.CSS_SELECTOR, "div.output > pre").get_attribute("innerHTML"))
Using xpath and text attribute:
print(driver.find_element(By.XPATH, "//div[#class='output']/pre[contains(., 'ContactUri')]").text)
To extract the text ideally you need to induce WebDriverWait for the visibility_of_element_located() and you can use either of the following locator strategies:
Using CSS_SELECTOR and text attribute:
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.output > pre"))).text)
Using XPATH and get_attribute("innerHTML"):
print(WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[#class='output']/pre[contains(., 'ContactUri')]"))).get_attribute("innerHTML"))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
You can find a relevant discussion in How to retrieve the text of a WebElement using Selenium - Python

Selenium web scraping iframe

i want to read the toner values on the web pages of the various printers in my office.
The problem is that the page is made up of several frames, and the one in which there is the remaining toner, is written in js and I can't read it even with selenium
This is my code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.expected_conditions import (
presence_of_element_located)
from selenium.webdriver.support.wait import WebDriverWait
def get_comment_count(driver, url):
driver.get(url)
wait = WebDriverWait(driver, 3)
e = driver.find_elements_by_xpath("/html/frameset/frame")
driver.switch_to_frame(e[0])
toner_iframe = driver.find_elements_by_xpath('//*[#id="contain"]')
# iframe_url = toner_iframe.get_attribute('src')
#driver.switch_to_frame(toner_iframe)
driver.switch_to.frame(toner_iframe)
print(toner_iframe)
url = "https://pritner_web_page"
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
driver = webdriver.Chrome(options=options)
get_comment_count(driver,url)
I tried also...
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
driver = webdriver.Chrome(options=options)
driver.get("http://printer_web_page")
WebDriverWait(driver,5).until(EC.frame_to_be_available_and_switch_to_it((By.ID,'wlmframe')))
WebDriverWait(driver,5).until(EC.frame_to_be_available_and_switch_to_it((By.ID,'toner')))
page_source=driver.page_source
print(page_source)
This is DOM Inspector of page. The various frames are dynamic and written in js as follows:
The code I wrote is just one of several different attempts to get to the frame, but to no avail

The element is within nested <frame> / <iframe> elements so you have to:
Induce WebDriverWait for the parent frame to be available and switch to it.
Induce WebDriverWait for the child frame to be available and switch to it.
Induce WebDriverWait for the desired element to be clickable.
You can use either of the following Locator Strategies:
Using CSS_SELECTOR:
driver.get("http://printer_web_page")
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"frame[name='wlmframe']")))
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR,"iframe#toner[name='toner']")))
Using XPATH:
driver.get("http://printer_web_page")
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//frame[#name='wlmframe']")))
WebDriverWait(driver, 20).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,"//iframe[#id='toner' and #name='toner']")))
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Reference
You can find a couple of relevant discussions in:
Ways to deal with #document under iframe
Switch to an iframe through Selenium and python
How to write appropriate Xpath to locate text value
How To sign in to Applemusic With Python Using Chrome Driver With Selenium

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extract title but give me wrong output using selenium - python

Related

Issues scraping a dynamic table displaying available and booked time slots

Clicking Accept cookies button using Selenium

Selenium click()

Getting text from a <pre> object with Selenium

Selenium web scraping iframe

Categories

Resources