Get all url in class by Selenium Python - python

I want to get all the links in the class tag like the image below.
enter image description here
driver.find_elements_by_xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div/div'):
url_video = a.get_property('href')
print(url_video)
i get the result is none
I use the 'a' tag to get all the links. I just want to get the links in the specified class. Please help me
This my code:
from selenium import webdriver
import time
browser=webdriver.Chrome()
time.sleep(6)
elements = browser.find_elements_by_xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div/div')
for element in elements:
videoUrl = element.get_attribute('href')
print(videoUrl)
----> The result is none

.find_elements_by_xpath() return a list, you should use element['href'] on each element of the list.
elements = driver.find_elements_by_xpath('...')
for element in elements:
videoUrl = element['href']
print(videoUrl)

Related

Selenium Multiple Elements Selection Python

I am trying to extract all available elements for the Xpath, and I did try element with 's' and without and cant seem to make it work. being 'Element' its alright but only returns me the first result and with 'Elements' it gives me an error : "AttributeError: 'list' object has no attribute 'find_elements'"
My code :
from selenium.webdriver.common.by import By
from selenium import webdriver
url = 'https://automira.ro/dealeri-autorizati/lista'
PATH = 'C:\\Users\\czoca\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Python 3.6\\chromedriver.exe'
driver = webdriver.Chrome(PATH)
driver.get(url)
driver.maximize_window()# For maximizing window
driver.implicitly_wait(100)# gives an implicit wait for 20 seconds
dealers = driver.find_elements(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[1]')
for dealer in dealers:
name = dealer.find_elements(By.XPATH, "/html/body/div[4]/div/div[3]/div/div[1]/div/div/h4/a").text
email = dealer.find_elements(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[2]/div/div/div[3]/a').text
phone = dealer.find_elements(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[2]/div/div/div[2]/a').text
print(name,email,phone)
Any ideias?
Thanks!
find_elements method returns a list object. you can iterate on list object to get all the elements you need.
for n in name:
print(n.text)
In your code, dealers returns a list of WebElements, so you can use find_elements. But in for loop - dealer returns only one WebElement per iteration, so you have to use find_element
dealers = driver.find_elements(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[1]')
for dealer in dealers:
# you should use 'find_element' for name, email and phone
name = dealer.find_element(By.XPATH, "/html/body/div[4]/div/div[3]/div/div[1]/div/div/h4/a").text
email = dealer.find_element(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[2]/div/div/div[3]/a').text
phone = dealer.find_element(By.XPATH, '/html/body/div[4]/div/div[3]/div/div[2]/div/div/div[2]/a').text
print(name,email,phone)

Selenium cant find element

I trying to get the specific element (Minimum Amount) with selenium but its returns empty
options = Options()
options.headless = True
browser = webdriver.Firefox(options=options)
browser.get('https://www.huobi.com/en-us/trade-rules/exchange')
time.sleep(5)
name = browser.find_element(by=By.CSS_SELECTOR, value='.dt-wrap+ .exchange-item span:nth-child(4)')
print(name.text) # Return Empty
how can do it with selenium or beautifulsoap?
Data is also populating from external source via API. So you can easily pull all the required data whatever you need using only requests module.
Example:
import requests
api_url = 'https://www.huobi.com/-/x/pro/v2/beta/common/symbols?r=mhzzzd&x-b3-traceid=6c1acdfbf0a19d63cc05c62de990a55c'
req = requests.get(api_url).json()
for item in req['data']:
print(item['display_name'])
Output:
REN/HUSD
NKN/HT
BRWL/USDT
NSURE/BTC
ITC/USDT
SPA/USDT
CTC/USDT
EVX/BTC
EUL/USDT
USTC/USDT
SUKU/USDT
KAN/BTC
NFT/USDC
LOOKS/USDT
IOI/USDT
DORA/USDT
BAT/USDT
QSP/ETH
WXT/USDT
RING/ETH
NEAR/ETH
SWFTC/BTC
LINK/HUSD
RUFF/USDT
EFI/USDT
DIO/USDT
AVAX/USDC
GSC/ETH
RAD/BTC
INSUR/USDT
NODL/USDT
H2O/USDT
BTC/HUSD
FIRO/ETH
KCASH/BTC
XPNT/USDT
STPT/BTC
XCN/USDT
ETC/BTC
OCN/ETH
BTC/EUR
MAN/BTC
OP/USDC
OXT/BTC
DASH/USDT
KSM/USDT
SD/USDT
YGG/BTC
... so on
I think your CSS_SELECTOR is wrong. Maybe do it with a list and take the element you want?
Something like:
exchange_items: list = driver.find_elements(By.XPATH, '//div[#class="exchange-item"]')
target = exchange_items[3]
print(target.text)
Here we take all items and choose the 4.
In that specific case use .get_attribute('innerText') or .get_attribute('innerHTML') to get your goal with selenium:
browser.find_element(By.CSS_SELECTOR, '.exchange-item span:nth-of-type(4)').get_attribute('innerText')

How print as text an Selenium html element?

I'm making an Selenium project for fun. I want see all football score on my terminal. I use Selenium for scraping. But I cannot print the scraped element. How can I fix that?
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import os
from bs4 import BeautifulSoup
import lxml
team_both = []
team_one = []
team_two = []
team_id = []
driver = webdriver.Chrome(ChromeDriverManager().install())
time.sleep(1)
#os.system('clear')
time.sleep(1)
driver.get('https://www.bet365.de/#/IP/B1')
time.sleep(1)
try:
driver.find_element_by_class_name('iip-IntroductoryPopup_Cross').click()
except:
pass
time.sleep(1)
# Scroll to bottom
element = driver.find_element_by_class_name("ovm-OverviewScroller-enabled")
actions = ActionChains(driver)
actions.move_to_element(element).click_and_hold().perform()
soup = BeautifulSoup(driver.page_source, 'lxml')
time.sleep(5)
os.system('clear')
#ovm-FixtureDetailsTwoWay_TeamName
teams = soup.find_all('div' , {"class" : "ovm-FixtureDetailsTwoWay_TeamName "})
for i in teams:
print(i.text)
The simplest way to extract a text from web element is by applying the .text method, but since you are using find_all method, the team_both is not a single web element rather a list of web elements.
In order to get texts from the web elements in a list you have to iterate over the elements in the list and extract text from each element, as following:
team_both = soup.find_all('div', {"class" : "ovm-FixtureDetailsTwoWay_TeamName"})
for team in team_both:
print(team.text)

Retrieve a class text python selenium and iterate all pages

I tried to retrieve the MFR number of items of EPSON from a website.
from selenium import webdriver
driver = webdriver.Chrome( "C:\All\chromedriver_win32\chromedriver.exe" )
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name( 'productDetailsLink' )
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR= item.find_element_by_xpath( './/span[#class = "darkTxt"]')
print (i, item.text,MFR)
I retrieved 'i' correctly but something is going wrong with MFR for EPSON products only and I cannot retrieve the MFR. I am getting an error saying no element was found in MFR. Next I want to go to next page and retrieve MFR of EPSON there also. I cannot do that.
Error
Unable to locate element: {"method":"xpath","selector":".//span[#class = "darkTxt"]"}
I am a newbie so please help me!
Thanks.
the problem is that you are finding link as a Parent Element, if you will see the html Code, Link doesn't have any child element.
You need to find whole Div which contains all the information as Parent Element. which has following HTML code
<div class="productResult js-productResult unpriced needsLogin">
Following code snippet will work with just one minor change about parent element.
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name('productResult')
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR = item.find_element_by_xpath('.//span[#class = "darkTxt"]')
print (i, item.text, MFR)
You can try this,
from selenium import webdriver
driver = webdriver.Chrome( "C:\All\chromedriver_win32\chromedriver.exe" )
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name( 'productResult' )
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR= item.find_element_by_xpath( './/div[#class = "productCodes"]/div[2]/span')
print (i, item.text,MFR)
Tested on Chromium but it looks to me just a typo, no starting dot on the path to find_element_by_xpath:
MFR= item.find_element_by_xpath('//span[#class = "darkTxt"]')
print (i, item.text, MFR)

Parse BeautifulSoup element into Selenium

I want to get the source code of a website using selenium; find a particular element using BeautifulSoup; and then parse it back into selenium as a selenium.webdriver.remote.webelement object.
Like so:
driver.get("www.google.com")
soup = BeautifulSoup(driver.source)
element = soup.find(title="Search")
element = Selenium.webelement(element)
element.click()
How can I achieve this?
A general solution that worked for me is to compute the xpath of the bs4 element, then use that to find the element in selenium,
xpath = xpath_soup(soup_element)
selenium_element = driver.find_element_by_xpath(xpath)
...
import itertools
def xpath_soup(element):
"""
Generate xpath of soup element
:param element: bs4 text or node
:return: xpath as string
"""
components = []
child = element if element.name else element.parent
for parent in child.parents:
"""
#type parent: bs4.element.Tag
"""
previous = itertools.islice(parent.children, 0, parent.contents.index(child))
xpath_tag = child.name
xpath_index = sum(1 for i in previous if i.name == xpath_tag) + 1
components.append(xpath_tag if xpath_index == 1 else '%s[%d]' % (xpath_tag, xpath_index))
child = parent
components.reverse()
return '/%s' % '/'.join(components)
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
driver = webdriver.Chrome()
driver.get("http://www.google.com")
soup = BeautifulSoup(driver.page_source, 'html.parser')
search_soup_element = soup.find(title="Search")
input_element = soup.select('input.gsfi.lst-d-f')[0]
search_box = driver.find_element(by='name', value=input_element.attrs['name'])
search_box.send_keys('Hello World!')
search_box.send_keys(Keys.RETURN)
This pretty much works. I can see reason for working with both webdriver and BeautifulSoup but not necessarily for this example.
I don't know of any way to pass from bs4 to selenium but you can just use selenium to find the element:
driver.find_element_by_xpath('//input[#title="Search"]').click()
Or to find using just the title text like your bs4 find:
driver.find_element_by_xpath('//*[#title="Search"]').click()

Categories