I tried to retrieve the MFR number of items of EPSON from a website.
from selenium import webdriver
driver = webdriver.Chrome( "C:\All\chromedriver_win32\chromedriver.exe" )
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name( 'productDetailsLink' )
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR= item.find_element_by_xpath( './/span[#class = "darkTxt"]')
print (i, item.text,MFR)
I retrieved 'i' correctly but something is going wrong with MFR for EPSON products only and I cannot retrieve the MFR. I am getting an error saying no element was found in MFR. Next I want to go to next page and retrieve MFR of EPSON there also. I cannot do that.
Error
Unable to locate element: {"method":"xpath","selector":".//span[#class = "darkTxt"]"}
I am a newbie so please help me!
Thanks.
the problem is that you are finding link as a Parent Element, if you will see the html Code, Link doesn't have any child element.
You need to find whole Div which contains all the information as Parent Element. which has following HTML code
<div class="productResult js-productResult unpriced needsLogin">
Following code snippet will work with just one minor change about parent element.
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name('productResult')
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR = item.find_element_by_xpath('.//span[#class = "darkTxt"]')
print (i, item.text, MFR)
You can try this,
from selenium import webdriver
driver = webdriver.Chrome( "C:\All\chromedriver_win32\chromedriver.exe" )
driver.get('https://shop.techdata.com/searchall?b=1&kw=printer')
items = driver.find_elements_by_class_name( 'productResult' )
for i, item in enumerate(items):
if 'EPSON' in item.text:
MFR= item.find_element_by_xpath( './/div[#class = "productCodes"]/div[2]/span')
print (i, item.text,MFR)
Tested on Chromium but it looks to me just a typo, no starting dot on the path to find_element_by_xpath:
MFR= item.find_element_by_xpath('//span[#class = "darkTxt"]')
print (i, item.text, MFR)
Related
I want to get all the links in the class tag like the image below.
enter image description here
driver.find_elements_by_xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div/div'):
url_video = a.get_property('href')
print(url_video)
i get the result is none
I use the 'a' tag to get all the links. I just want to get the links in the specified class. Please help me
This my code:
from selenium import webdriver
import time
browser=webdriver.Chrome()
time.sleep(6)
elements = browser.find_elements_by_xpath('/html/body/div[2]/div[2]/div[2]/div/div[2]/div[2]/div/div[1]/div[1]/div/div')
for element in elements:
videoUrl = element.get_attribute('href')
print(videoUrl)
----> The result is none
.find_elements_by_xpath() return a list, you should use element['href'] on each element of the list.
elements = driver.find_elements_by_xpath('...')
for element in elements:
videoUrl = element['href']
print(videoUrl)
What I mean is that the website I'm using has 2 dropmenus named province with the exact same id, so how do I tell python which dropmenu in particular I wanna select. Of course this is assuming that the issue is that python always picks the first id it sees
from selenium import webdriver
from selenium.webdriver.support.ui import Select
# There are two dropmenu with the same xpath. first time it works fine
# 2nd time it throws an error about element not interactable
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(2)
def Start():
# once opened it will fill in the confirm your age
Day = Select(web.find_element_by_xpath('//*[#id="bday_day"]'))
Day.select_by_index(2)
Month = Select(web.find_element_by_xpath('//*[#id="bday_month"]'))
Month.select_by_index(4)
Month = Select(web.find_element_by_xpath('//*[#id="bday_year"]'))
Month.select_by_index(24)
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Button = web.find_element_by_xpath('//*[#id="popup-subscribe"]/button')
Button.click()
# have to go through select your birthday
Start()
# 2 seconds is enough for the website to load
time.sleep(2)
# this throws and error.
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Selenium has functions
find_element_by_... without s in word element to get only first element
find_elements_by_... with s in word elements to get all elements
Selenium doc: 4. Locating Elements¶
So you can get all elements as list (even if there is only one element in HTML)
(if there is no elements then you get empty list)
elements = web.find_elements_by_xpath('//*[#id="province"]')
and later slice it
first = elements[0]
second = elements[1]
last = elements[-1]
list_first_and_second = elements[:1]
EDIT:
You can also try to slice directly in xpath like
(it starts counting at one, not zero)
'//*[#id="province"][2]'
or maybe
'(//*[#id="province"])[2]'
but I never used it to confirm if it will work.
BTW:
All ID should have unique names - you shouldn't duplicate IDs.
If you check documentation 4. Locating Elements¶ then you see that there is find_element_by_id without char s in word element - to get first and the only element with some ID - but there is no find_elements_by_id with char s in word elements - to get more then one element with some ID.
EDIT:
Minimal working code with example HTML in code
from selenium import webdriver
from selenium.webdriver.support.ui import Select
html = '''
<select id="province">
<option value="value_A">A</options>
<option value="value_B">B</options>
</select>
<select id="province">
<option value="value_1">1</options>
<option value="value_2">2</options>
</select>
'''
driver = webdriver.Firefox()
driver.get("data:text/html;charset=utf-8," + html)
all_elements = driver.find_elements_by_xpath('//*[#id="province"]')
first = all_elements[0]
second = all_elements[1]
prov1 = Select(first)
prov2 = Select(second)
print('--- first ---')
for item in prov1.options:
print('option:', item.text, item.get_attribute('value'))
for item in prov1.all_selected_options:
print('selected:', item.text, item.get_attribute('value'))
print('--- second ---')
for item in prov2.options:
print('option:', item.text, item.get_attribute('value'))
for item in prov2.all_selected_options:
print('selected:', item.text, item.get_attribute('value'))
EDIT:
There are two province.
When you use find_element in Start then you get first province in popup - and you can fill it. When you click button then it closes this popup but it doesn't remove first province from HTML - it only hide it.
Later when you use find_element you get again first province in hidden popup - and this time it is not visible and it can't use it - and this gives error. You have to use second province like in this example.
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
def Start():
# once opened it will fill in the confirm your age
Day = Select(web.find_element_by_xpath('//*[#id="bday_day"]'))
Day.select_by_index(2)
Month = Select(web.find_element_by_xpath('//*[#id="bday_month"]'))
Month.select_by_index(4)
Month = Select(web.find_element_by_xpath('//*[#id="bday_year"]'))
Month.select_by_index(24)
# it uses first `province`
Prov = Select(web.find_element_by_xpath('//*[#id="province"]'))
Prov.select_by_index(5)
Button = web.find_element_by_xpath('//*[#id="popup-subscribe"]/button')
Button.click()
web = webdriver.Firefox()
web.get('https://www.tastyrewards.com/en-ca/contest/fritolaycontest/participate')
# have to go through select your birthday
Start()
# 2 seconds is enough for the website to load
time.sleep(2)
# `find_elements` with `s` - to get second `province`
all_province = web.find_elements_by_xpath('//*[#id="province"]')
second_province = all_province[1]
Prov = Select(second_province)
Prov.select_by_index(5)
I'm trying to get the title of the elements in my for loop but the result is a list of the title of the first element so the loop is not working. How can I fix it? I'm not sure what's causing this.
This is the code:
titles = []
wait = WebDriverWait(driver, 30)
elem = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "thumbImage.loaded")))
for e in elem:
ActionChains(driver).move_to_element(e).click().perform()
e_title = wait.until(EC.visibility_of_element_located((By.CLASS_NAME,'titleOfElement'))).
titles.append(e_title.text)
This is the HTML of the title:
<h2 id="detailsModal" class="titleOfElement"> title </h2>
This issue might be that wait.until(EC.visibility_of_element_located((By.CLASS_NAME,'titleOfElement'))) is returning the first element it finds the number of times for e in elem: goes through the loop.
I am assuming you are instead trying to select the elements under the node e in the loop. For this you can use a relative path reference ./
Without access to site you are trying to do this on I am blind here but see below may get you in the right direction.
titles = []
wait = WebDriverWait(driver, 30)
elem = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "thumbImage.loaded")))
for e in elem:
ActionChains(driver).move_to_element(e).click().perform()
# e_title = wait.until(EC.visibility_of_element_located((By.CLASS_NAME,'titleOfElement')))
wait.until(EC.visibility_of_element_located((By.XPATH, "//h2[#class='titleOfElement']")))
e_title = e.find_element_by_xpath(".//h2[#class='titleOfElement']")
titles.append(e_title.text)
I am scraping amazon products but in the first, I want to click on each category, the code work just with the first category in the loop and get this error, I searched about that and found many of answers but they didn't work inside the loop and all of them work with xpath(one element not elements)
first click (see_more) work, the problem with a click in loop
ERROR:
selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=80.0.3987.149)
Here is the code.
from selenium.common.exceptions import ElementClickInterceptedException
from csv import writer
from selenium.webdriver import ActionChains
driver = webdriver.Chrome(executable_path='C:\\Users\\Compu City\\Desktop\\chromedriver.exe')
driver.get('https://www.amazon.com/international-sales-offers/b/?ie=UTF8&node=15529609011&ref_=nav_navm_intl_deal_btn')
time.sleep(10)
res = driver.execute_script("return document.documentElement.outerHTML", 'window.scrollBy(0,2000)')
soup = BeautifulSoup(res, 'lxml')
cat=[]
filter_con = driver.find_element_by_id('widgetFilters') # main container of products
cats=driver.find_elements_by_css_selector('.a-expander-container .a-checkbox label .a-label')
see_more =driver.find_element_by_css_selector('#widgetFilters > div:nth-child(1) > div.a-row.a-expander-container.a-expander-inline-container > a > span')
ActionChains(driver).move_to_element(filter_con).click(see_more).perform()
cat= 0
while(cat < len(cats)):
print(cat)
print(cats[cat].text)
action = ActionChains(driver).move_to_element(filter_con).click(cats[cat]).perform()
cat+=1
The moment you click on the cat element the references in the cats will be refreshed meaning selenium will get a new set of references to each elements. And as you are still pointing to the older references you are getting Stale Element Exception. Update your code as below.
Option 1: Fixing in existing code
while(cat < len(cats)):
currentCat = driver.find_elements_by_css_selector('.a-expander-container .a-checkbox label .a-label')[cat]
print(cat)
print(currentCat.text)
action = ActionChains(driver).move_to_element(filter_con).click(currentCat).perform()
cat+=1
Option 2: Using for loop (without action chain)
for catNumber in range(len(cats)):
cat = driver.find_elements_by_css_selector('.a-expander-container .a-checkbox label .a-label')[catNumber+1]
print(catNumber+1)
# scroll to the elemen
cat.location_once_scrolled_into_view
# click
cat.click()
I'm scraping this website using Python and Selenium. I have the code working but it currently only scrapes the first page, I would like to iterate through all the pages and scrape them all but they handle pagination in a weird way how would I go through the pages and scrape them one by one?
Pagination HTML:
<div class="pagination">
First
Prev
1
<span class="current">2</span>
3
4
Next
Last
</div>
Scraper:
import re
import json
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options
options = Options()
# options.add_argument('--headless')
options.add_argument("start-maximized")
options.add_argument('disable-infobars')
driver=webdriver.Chrome(chrome_options=options,
executable_path=r'/Users/weaabduljamac/Downloads/chromedriver')
url = 'https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList'
driver.get(url)
def getData():
data = []
rows = driver.find_element_by_xpath('//*[#id="form1"]/table/tbody').find_elements_by_tag_name('tr')
for row in rows:
app_number = row.find_elements_by_tag_name('td')[1].text
address = row.find_elements_by_tag_name('td')[2].text
proposals = row.find_elements_by_tag_name('td')[3].text
status = row.find_elements_by_tag_name('td')[4].text
data.append({"CaseRef": app_number, "address": address, "proposals": proposals, "status": status})
print(data)
return data
def main():
all_data = []
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
list_options = select.options
for item in range(len(list_options)):
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
select.select_by_index(str(item))
driver.find_element_by_css_selector("input.formbutton#csbtnSearch").click()
all_data.extend( getData() )
driver.find_element_by_xpath('//*[#id="form1"]/div[3]/a[4]').click()
driver.get(url)
with open( 'wiltshire.json', 'w+' ) as f:
json.dump( all_data, f )
driver.quit()
if __name__ == "__main__":
main()
Before moving on to automating any scenario, always write down the manual steps you would perform to execute the scenario. Manual steps for what you want to (which I understand from the question) is -
1) Go to site - https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList
2) Select first week option
3) Click search
4) Get the data from every page
5) Load the url again
6) Select second week option
7) Click search
8) Get the data from every page
.. and so on.
You are having a loop to select different weeks but inside each loop iteration for the week option, you also need to include a loop to iterate over all the pages. Since you are not doing that, your code is returning only the data from the first page.
Another problem is with how you are locaing the 'Next' button -
driver.find_element_by_xpath('//*[#id="form1"]/div[3]/a[4]').click()
You are selecting the 4th <a> element which is ofcourse not robust because in different pages, the Next button's index will be different. Instead, use this better locator -
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
Logic for creating loop which will iterate through pages -
First you will need the number of pages. I did that by locating the <a> immediately before the "Next" button. As per the screenshot below, it is clear that the text of this element will be equal to the number of pages -
-
I did that using following code -
number_of_pages = int(driver.find_element_by_xpath("//a[contains(text(),'Next')]/preceding-sibling::a[1]").text)
Now once you have number of pages as number_of_pages, you only need to click "Next" button number_of_pages - 1 times!
Final code for your main function-
def main():
all_data = []
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
list_options = select.options
for item in range(len(list_options)):
select = Select(driver.find_element_by_xpath("//select[#class='formitem' and #id='selWeek']"))
select.select_by_index(str(item))
driver.find_element_by_css_selector("input.formbutton#csbtnSearch").click()
number_of_pages = int(driver.find_element_by_xpath("//a[contains(text(),'Next')]/preceding-sibling::a[1]").text)
for j in range(number_of_pages - 1):
all_data.extend(getData())
driver.find_element_by_xpath("//a[contains(text(),'Next')]").click()
time.sleep(1)
driver.get(url)
with open( 'wiltshire.json', 'w+' ) as f:
json.dump( all_data, f )
driver.quit()
Following approach is simply worked for me.
driver.find_element_by_link_text("3").click()
driver.find_element_by_link_text("4").click()
....
driver.find_element_by_link_text("Next").click()
first get the total pages in the pagination, using
ins.get('https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList/10702380,1')
ins.find_element_by_class_name("pagination")
source = BeautifulSoup(ins.page_source)
div = source.find_all('div', {'class':'pagination'})
all_as = div[0].find_all('a')
total = 0
for i in range(len(all_as)):
if 'Next' in all_as[i].text:
total = all_as[i-1].text
break
Now just loop through the range
for i in range(total):
ins.get('https://services.wiltshire.gov.uk/PlanningGIS/LLPG/WeeklyList/10702380,{}'.format(count))
keep incrementing the count and get the source code for the page and then get the data for it.
Note: Don't forget the sleep when clicking on going form one page to another