The question is about selenium webdriver's explicitly wait feature which is same locating action with a timeout. Webdriver waits element to be visible or present. In my situation, I locate elements with explicit waiting and without it. When it is without the explicit wait, it founds and assigns it. But below, I also try to locate element explicitly waiting, that variable is unassigned, and in such form passes to for iteration, and my for iteration gives UnboundLocalError
I know that error is not selenium but python based. I can get rid of that error by using instead of pass y_priceNewProds = x_priceNewProds` but I need to understand why selenium webdriver cannot locate the element with wait if it already a line above found it. There must be cookies also.
code is below;
import pandas as pd
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
#reading from csv file url-s
def readCSV(path_csv):
df=pd.read_csv(path_csv)
return df
fileCSV=readCSV(r'C:\Users\Admin\Downloads\urls.csv')
length_of_column_urls=fileCSV['linkamazon'].last_valid_index()
def create_driver():
chrome_options = Options()
chrome_options.headless = True
chrome_options.add_argument("start-maximized")
# options.add_experimental_option("detach", True)
chrome_options.add_argument("--no-sandbox")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('excludeSwitches', ['enable-logging'])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
webdriver_service = Service(r'C:\pythonPro\w_crawl\AmznScrpBot\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
return driver
#going to urls 1-by-1
def goToUrl_Se(driver):
global counter
counter = 0
for i in range(0, length_of_column_urls + 1):
xUrl = fileCSV.iloc[i, 1]
print(xUrl,i)
# going to url(amazn) via Selenium WebDriver
driver.get(xUrl)
parse_data()
counter+=1
driver.quit()
#fetch-parse the data from url page
def parse_data():
global asin, title, bookform, priceNewProd
wait=WebDriverWait(driver,timeout=20)
try:
#trying code snippets where locating elements occur by XPATH, in some ofthem
#there is also WebDriver explicit wait, that element has both methods
#with presence_of element wait method and without explicit wait method. Just trying to understand
#why sometimes elements cannot be located on page
y_data_index=wait.until(EC.presence_of_element_located((By.XPATH,"//div[#data-index='1']")))
print(y_data_index.text,'y_index')
x_data_index=wait.until(EC.visibility_of_element_located((By.XPATH,"//div[#data-index='1']")))
print(x_data_index.text,'x_index')
x_titles=driver.find_elements(By.XPATH,'//span[contains(#class,"a-size-medium")]')
x_bookforms = driver.find_elements(By.XPATH,'//div[contains(#class,"a-section a-spacing-none a-spacing-top-micro s-price-instructions-style")]//div[contains(#class,"a-row a-size-base a-color-base")]//a[contains(#class,"a-size-base a-link-normal puis-medium-weight-text s-underline-text s-underline-link-text s-link-style s-link-centralized-style a-text-bold")]')
x_priceNewProds = driver.find_elements(By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')
y_priceNewProds=wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
except:
pass
for i in range(len(x_titles)):
x_title=x_titles[i].text
x_priceNewProd = x_priceNewProds[i].text
print(x_priceNewProd)
y_priceNewProd=y_priceNewProds[i].text
print(y_priceNewProd)
try:
x_bookform=x_bookforms[i].text
print(x_bookform)
except:
x_bookform='-'
title = x_title
bookform = x_bookform
priceNewProd = x_priceNewProd
write_to_csv()
def write_to_csv():
if counter==0:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='w')
else:
df=pd.DataFrame({'Products':title,'Bookform':bookform,'Price':priceNewProd},index=[0])
wr=df.to_csv('results_new00.csv',index=False,mode='a',header=False)
driver=create_driver()
goToUrl_Se(driver)
y_priceNewProd=y_priceNewProds[i].text UnboundLocalError: local variable 'y_priceNewProds' referenced before assignment
Since you didn't share all the code including the link we can't know what exactly happens there, but I guess that this line
wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
Throws exception since that element(s) are / is not visible.
This is why y_priceNewProds remains not assigned any value.
While if you use simple driver.find_elements method it can find those elements since they are existing but, again, not visible.
UPD
After you shared the link I tested that and now I can say: Yes, I was right!
Those elements are not visible.
The following code throws TimeoutException
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]//span[contains(#class,"a-offscreen")][1]')))
print(m_list)
UPD2
But if you will take the direct parent of that element it will work correctly since it is visible.
For the following code:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=options)
url = 'https://www.amazon.com/s?i=stripbooks&bbn=1&rh=n%3A1%2Cp_n_feature_eighteen_browse-bin%3A8622846011&dc&fs=true&ds=v1%3AgAO0%2BZc%2BC6RthRkqyWzOHmDVufv7JbuCK96Grvjle68&qid=1665559431&rnid=8622845011&ref=sr_nr_p_n_feature_eighteen_browse-bin_5'
driver.get(url)
wait = WebDriverWait(driver, 20)
m_list = wait.until(EC.visibility_of_all_elements_located((By.XPATH, '//div[contains(#class,"a-row a-size-base a-color-base")]//span[contains(#data-a-color,"base")]')))
print(len(m_list))
the output is:
27
Related
I have a following problem. On the picture bellow I would like to fill some text into the second (red) field.
My code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
def set_scraper():
"""Function kills running applications and set up the ChromeDriver."""
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", options=options)
return driver
def main() -> None:
"""Main function that is call when the script is run."""
driver = set_scraper()
driver.get("https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO")
pokus = driver.find_element(By.XPATH, '/html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1]')
driver.implicitly_wait(10)
ActionChains(driver).move_to_element(pokus).send_keys("2727").perform()
The problem is that it sends "2727" into the first field, not into the red one. Although /html/body/form/div[5]/div/div/div/div[3]/div/fieldset/div[2]/div[2]/input[1] is the full xpath of the second field. Do you know why, please?
You can use XPath to locate the parent element based on unique text "Obec" in the child element and then locate the proper input element.
Here I'm using fixed attribute values that not seem to change.
The following code works:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://nahlizenidokn.cuzk.cz/VyberBudovu/Stavba/InformaceO"
driver.get(url)
wait.until(EC.element_to_be_clickable((By.XPATH, "//div[#class='fieldsetWrapper'][contains(.,'Obec')]//input[#type='text']"))).send_keys("2727")
The result is:
Try with below
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[#title='Zadejte název obce']")))
element.send_keys("2727")
You can enter the text in the second text field using the below XPATH:
driver.find_element(By.XPATH, ".//input[#name='ctl00$bodyPlaceHolder$vyberObec$txtObec']").send_keys("2727")
# clicking on the button
driver.find_element(By.XPATH, ".//input[#title='Vyhledat obec']").click()
I am using selenium WebDriver to collect the URL's to images from a website that is loaded with JavaScript. It appears as though my following code returns only 160 out of the about 240 links. Why might this be - because of the JavaScript rendering?
Is there a way to adjust my code to get around this?
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
driver.get('https://www.politicsanddesign.com/')
img_url = driver.find_elements_by_xpath("//div[#class='responsive-image-wrapper']/img")
img_url2 = []
for element in img_url:
new_srcset = 'https:' + element.get_attribute("srcset").split(' 400w', 1)[0]
img_url2.append(new_srcset)
You need to wait for all those elements to be loaded.
The recommended approach is to use WebDriverWait expected_conditions explicit waits.
This code is giving me 760-880 elements in the img_url2 list:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument("start-maximized")
webdriver_service = Service('C:\webdrivers\chromedriver.exe')
driver = webdriver.Chrome(options=options, service=webdriver_service)
wait = WebDriverWait(driver, 10)
url = "https://www.politicsanddesign.com/"
driver.get(url) # once the browser opens, turn off the year filter and scroll all the way to the bottom as the page does not load all elements on rendering
wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[#class='responsive-image-wrapper']/img")))
# time.sleep(2)
img_url = driver.find_elements(By.XPATH, "//div[#class='responsive-image-wrapper']/img")
img_url2 = []
for element in img_url:
new_srcset = 'https:' + element.get_attribute("srcset").split(' 400w', 1)[0]
img_url2.append(new_srcset)
I'm not sure if this code is stable enough, so if needed you can activate the delay between the wait line and the next line grabbing all those img_url.
EDIT:
Once the browser opens, you'll need to turn of the page's filter and then scroll all the way to the bottom of the page as it does not automatically load all of the elements when it renders; only once you've worked with the page a little bit.
I need to scrape the image src from this popup. I have coded that but getting "AttributeError: 'NoneType' object has no attribute 'findElements'.
Here is the code.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from chromedriver_py import binary_path
import time
from time import sleep
url = 'https://www.macys.com/shop/product/black-tape-printed-contrast-trim-cardigan?ID=11398979&CategoryID=260&isDlp=true&swatchColor=Neutral%20Animal'
options = Options()
options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome(options=options, executable_path=binary_path)
wait = WebDriverWait(driver, 10)
driver.get(url)
sizechart_popup = wait.until(EC.element_to_be_clickable((By.XPATH, './/*[#class="sc-link"]'))).click()
sizechart = sizechart_popup.findElements(By.XPATH('.//*[#id="sizeImg"]/img')).get_attribute("src");
print(sizechart)
# Sleep of 10 seconds irrespective of whether element is present or not
time.sleep(50)
# Free up the resources
driver.close()
Thanks in advance
Try using all the available element identifiers if one doesn't work, it worked with css_selector.
sizechart = driver.find_element_by_css_selector('#sizeImg > img').get_attribute("src")
print(sizechart)
#Output:
https://assets.macysassets.com/dyn_img/size_charts/4011861.gif
I am trying to get a list of the prices from this page.
The class name of the elements I'm trying to get is called s-item__price.
This is my code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
chrome_options = Options()
chrome_options.add_argument('--headless')
browser = webdriver.Chrome(options=chrome_options)
browser.get(url)
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
The output is just an empty list.
You can use WebDriverWait to wait until the javascript generated the element:
wait = WebDriverWait(browser, 15) # 15 sec timeout
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
You could also use presence_of_elements_located but if it comes to click interaction it won't work with hidden elements.
So prefer using: visibility_of_element_located
Example Code:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
url = 'https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone+8+&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1'
options = Options()
options.add_argument('--headless')
browser = webdriver.Chrome(options=options)
browser.get(url)
wait = WebDriverWait(browser, 15) # Throws a TimeoutException after 15 seconds
wait.until(expected_conditions.visibility_of_element_located((By.CLASS_NAME, 's-item__price')))
# you may also culd use the return value of the wait
print(browser.find_elements_by_class_name('s-item__price'))
browser.quit()
You get an empty list I think it because you need wait.
Use WebDriverWait and utilize .presence_of_all_elements_located to collect elements in a list.
Then extract them with a loop and you must call the .text method to grab the text
browser.get('https://www.ebay.de/sch/i.html?_from=R40&_nkw=iphone%208%20&_sacat=0&LH_TitleDesc=0&LH_ItemCondition=3000&rt=nc&LH_Sold=1&LH_Complete=1')
wait = WebDriverWait(browser, 20)
list_price = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 's-item__price')))
for price in list_price:
print(price.text)
driver.quit()
Following import:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
I'm using selenium with ChromeDriver to crawl images. To speed up the progress, I execute javascript window.stop() to stop loading the whole page as soon as the elements of interest are loaded. I ran the same code several times but get different outcomes (sometimes the NoSuchElementException occurred, sometimes not).
Could anybody explain what's wrong with my code? Here is my sample code:
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
def get_driver(quick_load=True):
chromeOptions = webdriver.ChromeOptions()
# chromeOptions.add_argument('headless')
prefs = {"profile.managed_default_content_settings.images": 2}
chromeOptions.add_experimental_option("prefs", prefs)
if quick_load:
capa = DesiredCapabilities.CHROME
capa["pageLoadStrategy"] = "none"
driver = webdriver.Chrome(desired_capabilities=capa,
chrome_options=chromeOptions)
else:
driver = webdriver.Chrome(chrome_options=chromeOptions)
wait = WebDriverWait(driver, 20)
return driver, wait
url = [
"https://www.redbubble.com/people/captainkita/works/27361999-no-words?grid_pos=2&p=lightweight-hoodie&rbs=84d3442a-6f22-4dc1-980d-9a707104c791&ref=shop_grid",
"https://www.redbubble.com/people/zeeteesapparel/works/27338911-game-of-thrones-waiting-for-a-bastard-with-direwolf?grid_pos=44&p=t-shirt&rbs=337dcf4c-3ff9-4507-9cbd-c6e80f78d022&ref=shop_grid&style=vneck",
"https://www.redbubble.com/people/zeeteesapparel/works/27338911-game-of-thrones-waiting-for-a-bastard-with-direwolf?grid_pos=44&p=t-shirt&rbs=337dcf4c-3ff9-4507-9cbd-c6e80f78d022&ref=shop_grid&style=vneck"
]
driver, wait = get_driver()
for u in url:
driver.execute_script("window.open('%s');" % u)
driver.switch_to_window(driver.window_handles[-1])
time.sleep(2)
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'input[type="image"]')))
driver.execute_script("window.stop();")
print "Stopped window"
images = driver.find_elements_by_css_selector('input[type="image"]')
print len(images)
candidates = [t for t in images if "750x1000" in t.get_attribute('src')]
print candidates[-1].get_attribute('src')
# driver.execute_script("window.close();")
# driver.switch_to_window(driver.window_handles[-1])
driver.quit()