How do I retrieve the link of an image through Selenium - python

I'm trying to make my program fetch the link of an image and then store it as a string in a variable.
This is the xpath of the image. I need to do it through xpaths because the xpaths on the website are very similar bar the "/article[x]". This allow me to increase the number with a variable so that I can go through all the xpaths on the page.
/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img
Picture of the website that I'm trying to retrieve the links of the image
My code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import tkinter
import time
Anime = input("Enter Anime:")
driver = webdriver.Chrome(executable_path=r"C:\Users\amete\Documents\chromedriver.exe")
driver.get("https://myanimelist.net/search/all?q=one%20piece&cat=all")
search = driver.find_element_by_xpath('//input[#name="q"]')
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#name="q"]')))
# Clears the field
search.send_keys(Keys.CONTROL, 'a')
search.send_keys(Keys.DELETE)
# The field is now cleared and the program can type whatever it wants
search.send_keys(Anime)
search.send_keys(Keys.RETURN)
# Accept the cookies
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="qc-cmp2-ui"]/div[2]/div/button[3]'))).click()
# Added this wait
wait.until(EC.element_to_be_clickable((By.XPATH,'//h2[#id="anime"]//ancestor::div[#class="content-left"]//article[1]/div[contains(#class, "list")][1]/div[contains(#class, "information")]/a[1]')))
link = driver.find_element_by_xpath('//h2[#id="anime"]//ancestor::div[#class="content-left"]//article[1]/div[contains(#class, "list")][1]/div[contains(#class, "information")]/a[1]').text
piclink = driver.('/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img')
print (piclink)

you can get it like this (specify the attribute)
piclink = driver.find_element_by_xpath('/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img').get_attribute('src')
print(piclink)

Related

Use Python Selenium to extract span text

Hi i'm new at selenium and webscraping and i need some help.
i try to scrape one site and i need and i dont know how to get span class.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
PATCH = "/Users/bobo/Downloads/chromedriver"
driver = webdriver.Chrome(PATCH)
driver.get("https://neonet.pl")
print(driver.title)
search = driver.find_element_by_class_name("inputCss-input__label-263")
search.send_keys(Keys.RETURN)
time.sleep(5)
i try to extract this span
<span class="inputCss-input__label-263">Szukaj produktu</span>
I can see that you are trying to search something in the search bar.
First I recommend you to use the xpath instead of the class name, here is a simple technique to get the xpath of every element on a webpage:
right-click/ inspect element/ select the mouse in a box element on the upper left/ click on the element on the webpage/ it will directly show you the corresponding html/ then right click on the selected html/ copy options and then xpath.
Here is a code example that searches an element on the webpage, I also included the 'Webdriver-wait' option because sometimes the code runs to fast and can't find the next element so this function make the code wait till the element is visible:
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path="/Users/bobo/Downloads/chromedriver")
driver.get("https://neonet.pl") #loading page
wait = WebDriverWait(driver, 20) #defining webdriver wait
search_word = 'iphone\n' # \n is going to act as an enter key
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="root"]/main/div[1]/div[4]/div/div/div[2]/div/button[1]'))).click() #clicking on cookies popup
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="root"]/main/header/div[2]/button'))).click() #clicking on search button
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[#id="root"]/aside[2]/section/form/label/input'))).send_keys(search_word) #searching on input button
print('done!')
sleep(10)
Hope this helped you!
wait=WebDriverWait(driver,10)
driver.get('https://neonet.pl')
elem=wait.until(EC.visibility_of_element_located((By.XPATH, "//span[contains(#class,'inputCss-input')]"))).text
print(elem)
To output the value of the search bar you use .text on the Selenium Webelement.
Imports:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Selenium using WebDriverWait WebElement still doesn't print any text, recognizes string PYTHON

As title. Couldn't find a solution online. You'll see by the print statements that the program will get the text property from the WebElement object, but it is always an empty string even though I am using WebDriverWait.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
PATH = "C:\\Users\\Anthony\\Desktop\\chromedriver.exe"
driver = webdriver.Chrome(PATH)
website = "https://www.magicspoiler.com"
driver.get(website)
el_name = '//div[#class="set-card-2 pad5"]'
try:
main = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, el_name)))
articles = main.find_elements(By.XPATH, el_name)
for article in articles:
print(type(article.text))
print(article.text)
finally:
driver.quit()
I checked the HTML DOM and I don't see any text in the element you are looking for. They are simply images with anchor links and therefore you are getting blank responses.
I have given an alternate try, to extract the links and they were successful.
So, after refactoring your code, it looks like this:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
PATH = "C:\\Users\\Anthony\\Desktop\\chromedriver.exe"
driver = webdriver.Chrome(PATH)
website = "https://www.magicspoiler.com"
driver.get(website)
el_name = '//div[#class="set-card-2 pad5"]/a'
try:
main = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, el_name)))
articles = main.find_elements(By.XPATH, el_name)
for article in articles:
# print(type(article.text))
print(article.get_attribute("href"))
finally:
pass
driver.quit()
Here is the response I got:
So, please check if you want to get the links or really the text. If you want the text, then there is none that I see in DOM as I said. You may have to traverse through each of the link by clicking on it, and find if any you need fro the navigated page.

Python web scraping using Selenium - iterate through href link

I am trying to write a script that uses selenium to download many files which consist of different NHL players information; game-log. I want to download a file for each players in the following table: https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single
Once on that website, I wanted to click on all the players' name in the table. When a player's name is clicked through the href link, a new window opens. There are few drop-down menus at the top. I want to select "Rate" instead of "Counts" and also select " Game Log" instead of "Player Summary", and then click "Submit". Finally, I want to click on CSV(All) at the bottom to download a CSV file.
Here is my current code:
from selenium import webdriver
import csv
from selenium.webdriver.support.ui import Select
from datetime import date, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(chromedriver)
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
table = driver.find_element_by_xpath("//table[#class='indreg dataTable no-footer DTFC_Cloned']")
for row in table.find_elements_by_xpath("//tr[#role='row']")
links = driver.find_element_by_xpath('//a[#href]')
links.click()
select = Select(driver.find_element_by_name('rate'))
select.select_by_value("y")
select1 = Select(driver.find_element_by_name('v'))
select1.select_by_value("g")
select2 = Select(driver.find_element_by_type('submit'))
select2.select_by_value("submit")
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//div[#class="dt-button button-csv button-htm15"]')))
CSVall = driver.find_element_by_xpath('//div[#class="dt-button button-csv button-htm15"]')
CSVall.click()
driver.close()
I try to change different things, but I always get an error. Where is the problem ?
Moreover, I think I should probably add a line to wait for the website to load because it takes a few seconds; after "driver.get". I do not know what should be the expected conditions to end the wait in this case.
Thanks
Rather than keep clicking through selections you could grab the playerIds from the first page and concantenate those, along with the strings representing the selections for Rate and Game Log into the queryString part of the new URL. Sure you can tidy up the following.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def getPlayerId(url):
id = url.split('playerid=')[1]
id = id.split('&')[0]
return id
def makeNewURL(playerId):
return 'https://www.naturalstattrick.com/playerreport.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&stdoi=oi&rate=y&v=g&playerid=' + playerId
#chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome()
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
links = driver.find_elements_by_css_selector('table.indreg.dataTable.no-footer.DTFC_Cloned [href*=playerid]')
newLinks = []
for link in links:
newLinks.append(link.get_attribute('href'))
for link in newLinks:
playerId = getPlayerId(link)
link = makeNewURL(playerId)
driver.get(link)
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//a[#class="dt-button buttons-csv buttons-html5"][2]')))
CSVall = driver.find_element_by_xpath('//a[#class="dt-button buttons-csv buttons-html5"][2]')
CSVall.click()
you don't need to click each player link but save the URLs as list, also there are several error, you can see working code below
from selenium import webdriver
import csv
from selenium.webdriver.support.ui import Select
from datetime import date, timedelta
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(chromedriver)
driver.get("https://www.naturalstattrick.com/playerteams.php?fromseason=20142015&thruseason=20162017&stype=2&sit=all&score=all&stdoi=std&rate=y&team=ALL&pos=S&loc=B&toi=0.1&gpfilt=none&fd=&td=&tgp=410&lines=single")
playerLinks = driver.find_elements_by_xpath("//table[#class='indreg dataTable no-footer DTFC_Cloned']//a")
playerLinks = [p.get_attribute('href') for p in playerLinks]
print(len(playerLinks))
for url in playerLinks:
driver.get(url)
select = Select(driver.find_element_by_name('rate'))
select.select_by_value("y")
select1 = Select(driver.find_element_by_name('v'))
select1.select_by_value("g")
driver.find_element_by_css_selector('input[type="submit"]').click()
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH , '//a[#class="dt-button buttons-csv buttons-html5"][2]')))
CSVall = driver.find_element_by_xpath('//a[#class="dt-button buttons-csv buttons-html5"][2]')
CSVall.click()
driver.close()

My scraper fails to get all the items from a webpage

I've written some code in python in combination with selenium to parse different product names from a webpage. There are few load more buttons visible if the browser is made to scroll downward. The webpage displays it's full content if the page is made to scroll downmost until there is no load more button to click. My scraper seems to be doing good but I'm not getting all the results. There are around 200 products in that page but I'm getting 90 out of them. What change should I bring about in my scraper to get them all? Thanks in advance.
The webpage I'm dealing with: Page_Link
This is the script I'm trying with:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("put_above_url_here")
wait = WebDriverWait(driver, 10)
page = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".listing_item")))
for scroll in range(17):
page.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
try:
load = driver.find_element_by_css_selector(".lm-btm")
load.click()
except Exception:
pass
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()
Try below code to get required data:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.purplle.com/search?q=hair%20fall%20shamboo")
wait = WebDriverWait(driver, 10)
header = driver.find_element_by_tag_name("header")
driver.execute_script("arguments[0].style.display='none';", header)
while True:
try:
page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".listing_item")))
driver.execute_script("arguments[0].scrollIntoView();", page)
page.send_keys(Keys.END)
load = wait.until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "LOAD MORE")))
driver.execute_script("arguments[0].scrollIntoView();", load)
load.click()
wait.until(EC.staleness_of(load))
except:
break
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()
You should only Use Selenium as a last resort.
A simple look around in the webpage showed the API it called to get your data.
It returns a JSON output with all the details:
Link
You can now just loop over and store in a dataframe easily.
Very fast, fewer errors than selenium.

Unable to submit keys using selenium with python

Following is the code which im trying to run
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import os
import time
#Create a new firefox session
browser=webdriver.Firefox()
browser.maximize_window()
#navigate to app's homepage
browser.get('http://demo.magentocommerce.com/')
#get searchbox and clear and enter details.
browser.find_element_by_css_selector("a[href='/search']").click()
search=browser.find_element_by_class_name('search-input')
search.click()
time.sleep(5)
search.click()
search.send_keys('phones'+Keys.RETURN)
However, im unable to submit the phones using send_keys.
Am i going wrong somewhere?
Secondly is it possible to always use x-path to locate an element and not rely on id/class/css-selections etc ?
The input element you are interested in has the search_query class name. To make it work without using hardcoded time.sleep() delays, use an Explicit Wait and wait for the search input element to be visible before sending keys to it. Working code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Firefox()
browser.maximize_window()
wait = WebDriverWait(browser, 10)
browser.get('http://demo.magentocommerce.com/')
browser.find_element_by_css_selector("a[href='/search']").click()
search = wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "search-query")))
search.send_keys("phones" + Keys.RETURN)

Categories