Selenium/Python - Lag In Code? - python

I wrote some code to extract Insider Trading information from the Toronto Stock Exchange. Using Selenium, open up this link and then, using a list of stocks, one by one input each into the form, retrieve the data and put it into another list, then do the same for the next stock.
Here is the code:
from selenium import webdriver
stocks = ['RKN','MG','GTE','IMO','REI.UN','RY']
dt = []
url = 'https://app.tmxmoney.com/research/insidertradesummaries?locale=EN'
driver = webdriver.Firefox()
driver.get(url)
search = driver.find_element_by_xpath('//ul[#class="nav nav-pills"]/li[3]')
search.click()
stock_form = driver.find_element_by_name('text')
for stock in stocks:
stock_form.clear()
stock_form.send_keys(stock)
stock_form.submit()
data = driver.find_element_by_xpath('//div[#class="insider-trades-symbol-search-container"]/div[#class="ng-binding"]')
a = data.text.split('\n')
if len(a) > 1:
dt.append(a[-1].split())
else:
dt.append([])
driver.close()
If you run the code, you can see each stock being input into the form, the data will pop up and I attempt to retrieve it. However, when I get the text from "data", its as if its retrieved from what was visible on the page prior to submitting the form. I tried adding a wait to the code to no avail.

added a time.sleep(1) and the code works as intended.
from selenium import webdriver
import time
stocks = ['RKN','MG','GTE','IMO','REI.UN','RY']
dt = []
url = 'https://app.tmxmoney.com/research/insidertradesummaries?locale=EN'
driver = webdriver.Firefox()
driver.get(url)
search = driver.find_element_by_xpath('//ul[#class="nav nav-pills"]/li[3]')
search.click()
stock_form = driver.find_element_by_name('text')
for stock in stocks:
stock_form.clear()
stock_form.send_keys(stock)
stock_form.submit()
data = driver.find_element_by_xpath('//div[#class="insider-trades-symbol-search-container"]/div[#class="ng-binding"]')
a = data.text.split('\n')
**time.sleep(1)**
if len(a) > 1:
dt.append(a[-1].split())
else:
dt.append([])
driver.close()

Related

Why does selenium display the same page even after going to next page?

I'm trying to scrape rental listing data on Zillow. Specifically, I want the link, price, and address of each property. However, after scraping the first page successfully and clicking the next arrow button, it just displays the same listings even though the page shows I'm on page 2, 3, etc. How do I get the next page(s) listings? The project is supposed to use BeautifulSoup and Selenium, but after some research it looks like using only selenium is the easiest way to do this since Zillow uses lazy-loading.
main.py code:
DRIVER_PATH = "D:\chromedriver.exe"
FORM_URL = "HIDDEN"
WEBPAGE = "https://www.zillow.com/toronto-on/rentals/?searchQueryState=%7B%22pagination%22%3A%7B%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-79.40771727189582%2C%22east%22%3A-79.35750631913703%2C%22south%22%3A43.639155005365474%2C%22north%22%3A43.66405824004801%7D%2C%22mapZoom%22%3A15%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A792680%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22fore%22%3A%7B%22value%22%3Afalse%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sort%22%3A%7B%22value%22%3A%22days%22%7D%2C%22auc%22%3A%7B%22value%22%3Afalse%7D%2C%22nc%22%3A%7B%22value%22%3Afalse%7D%2C%22fr%22%3A%7B%22value%22%3Atrue%7D%2C%22sf%22%3A%7B%22value%22%3Afalse%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%2C%22fsbo%22%3A%7B%22value%22%3Afalse%7D%2C%22cmsn%22%3A%7B%22value%22%3Afalse%7D%2C%22fsba%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%7D"
data_entry = DataEntry(DRIVER_PATH)
# Opens the webpage and gets count of total pages via self.next_btns_len)
data_entry.open_webpage(WEBPAGE)
# n is the iterator for the number of pages on the site.
n = 1
# Scrapes link, price, address data, adds each to a specified class list, and then goes to next page.
while n < (data_entry.next_btns_len + 1):
# Scrapes one page of data and adds data to list in class object
data_entry.scrape_data()
# Goes to next page for scraping
sleep(5)
data_entry.next_page()
n += 1
enter_data.py code:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
from time import sleep
class DataEntry:
"""Enters the data from soup into Google Form"""
def __init__(self, driver_path):
# Options keeps the browser open after execution.
self.chrome_options = Options()
self.chrome_options.add_experimental_option("detach", True)
self.driver = webdriver.Chrome(executable_path=driver_path, chrome_options=self.chrome_options)
self.links = []
self.prices = []
self.addresses = []
self.next_btns_len = 0
def open_webpage(self, webpage):
# Opens desired webpage and gives two seconds to load
self.driver.get(webpage)
sleep(2)
# Gets total page numbers for main.py while loop
page_nums = self.driver.find_element(By.CSS_SELECTOR, '.Text-c11n-8-69-2__sc-aiai24-0.gCvDSp')
self.next_btns_len = int(page_nums.text.split()[3])
def scrape_data(self):
# Scrolls to each listing to make it visible to Selenium.
n = 1
while n < 41:
listing = self.driver.find_element(By.XPATH, f'/html/body/div[1]/div[5]/div/div/div/div[1]/ul/li[{n}]')
self.driver.execute_script("arguments[0].scrollIntoView(true);", listing)
print(n)
n += 1
# todo: Create a list of links for all the listings you scraped.
links = self.driver.find_elements(By.CSS_SELECTOR, ".list-card-info .list-card-link")
link_list = [link.get_attribute("href") for link in links]
# The if statement is to check if the DOM class name has changed, which produces an empty list.
# If the list is empty, then changes the css_selector. The website alternates between two.
if len(link_list) == 0:
links = self.driver.find_elements(By.CSS_SELECTOR, ".StyledPropertyCardDataArea-c11n-8-69-2__sc-yipmu-0.dZxoFm.property-card-link")
link_list = [link.get_attribute("href") for link in links]
self.links.extend(link_list)
print(len(self.links))
print(self.links)
# todo: Create a list of prices for all the listings you scraped.
prices = self.driver.find_elements(By.CSS_SELECTOR, ".list-card-price")
price_list = [price.text for price in prices]
if len(price_list) == 0:
prices = self.driver.find_elements(By.CSS_SELECTOR, ".StyledPropertyCardDataArea-c11n-8-69-2__sc-yipmu-0.kJFQQX")
price_list = [price.text for price in prices]
split_price_list = [price.split() for price in price_list]
final_price_list = [price[0].strip("C+/mo") for price in split_price_list]
self.prices.extend(final_price_list)
print(len(self.prices))
print(self.prices)
# todo: Create a list of addresses for all the listings you scraped.
addresses = self.driver.find_elements(By.CSS_SELECTOR, ".list-card-addr")
address_list = [address.text for address in addresses]
if len(address_list) == 0:
addresses = self.driver.find_elements(By.CSS_SELECTOR, ".StyledPropertyCardDataArea-c11n-8-69-2__sc-yipmu-0.dZxoFm.property-card-link address")
address_list = [address.text for address in addresses]
self.addresses.extend(address_list)
print(len(self.addresses))
print(self.addresses)
def next_page(self):
# Clicks the next arrow and waits 2 seconds for page to load
next_arrow = self.driver.find_element(By.XPATH, "//a[#title='Next page']")
next_arrow.click()
sleep(5)
def close_webpage(self):
self.driver.quit()
def enter_data(self, form_url, address, rent, link):
# Opens the Google Form and waits 3 seconds to load.
self.driver.get(form_url)
sleep(2)
# Enters each address, rent, and link into the form. Clicks submit after.
address_input = self.driver.find_element(By.XPATH, '//*[#id="mG61Hd"]/div[2]/div/div[2]/div[1]/div/div/div['
'2]/div/div[1]/div/div[1]/input')
address_input.send_keys(address)
rent_input = self.driver.find_element(By.XPATH, '//*[#id="mG61Hd"]/div[2]/div/div[2]/div[2]/div/div/div['
'2]/div/div[1]/div/div[1]/input')
rent_input.send_keys(rent)
link_input = self.driver.find_element(By.XPATH, '//*[#id="mG61Hd"]/div[2]/div/div[2]/div[3]/div/div/div['
'2]/div/div[1]/div/div[1]/input')
link_input.send_keys(link)
submit_btn = self.driver.find_element(By.XPATH, '//*[#id="mG61Hd"]/div[2]/div/div[3]/div[1]/div['
'1]/div/span/span')
submit_btn.click()
There is a less complex way to obtain the data you're looking for, using cloudscraper and pandas (and tqdm for convenience). You might also be in for a surprise, considering the time taken to get the data:
import cloudscraper
import pandas as pd
from tqdm import tqdm
scraper = cloudscraper.create_scraper()
df_list = []
for current_page in tqdm(range(1, 21)):
url = f'https://www.zillow.com/search/GetSearchPageState.htm?searchQueryState=%7B%22pagination%22%3A%7B%22currentPage%22%3A{current_page}%7D%2C%22mapBounds%22%3A%7B%22west%22%3A-79.44174913987678%2C%22east%22%3A-79.32347445115607%2C%22south%22%3A43.57772225826024%2C%22north%22%3A43.7254027835563%7D%2C%22mapZoom%22%3A13%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A792680%2C%22regionType%22%3A6%7D%5D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22isForSaleForeclosure%22%3A%7B%22value%22%3Afalse%7D%2C%22isAllHomes%22%3A%7B%22value%22%3Atrue%7D%2C%22sortSelection%22%3A%7B%22value%22%3A%22days%22%7D%2C%22isAuction%22%3A%7B%22value%22%3Afalse%7D%2C%22isNewConstruction%22%3A%7B%22value%22%3Afalse%7D%2C%22isForRent%22%3A%7B%22value%22%3Atrue%7D%2C%22isSingleFamily%22%3A%7B%22value%22%3Afalse%7D%2C%22isTownhouse%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleByOwner%22%3A%7B%22value%22%3Afalse%7D%2C%22isComingSoon%22%3A%7B%22value%22%3Afalse%7D%2C%22isForSaleByAgent%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%7D&wants=%7B%22cat1%22:[%22listResults%22,%22mapResults%22]%7D&requestId=6'
r = scraper.get(url)
for x in r.json()['cat1']['searchResults']['listResults']:
status = x['statusText']
address = x['address']
try:
price = x['units'][0]['price']
except Exception as e:
price = x['price']
if not 'https://www.' in x['detailUrl']:
url = 'https://zillow.com' + x['detailUrl']
else:
url = x['detailUrl']
df_list.append((address, price, url))
df = pd.DataFrame(df_list, columns = ['Address', 'Price', 'Url'])
df.to_csv('renting_in_toronto.csv')
print(df)
This will save the data in a csv file, and print out:
100%
20/20 [00:16<00:00, 1.19it/s]
Address Price Url
0 2221 Yonge St, Toronto, ON C$1,900+ https://zillow.com/b/Toronto-ON/43.70606,-79.3...
1 10 Yonge St, Toronto, ON C$2,100+ https://zillow.com/b/10-yonge-st-toronto-on-BM...
2 924 Avenue Rd, Toronto, ON M5P 2K6 C$1,895/mo https://www.zillow.com/homedetails/924-Avenue-...
3 797 Don Mills Rd, Toronto, ON C$1,850+ https://zillow.com/b/Toronto-ON/43.71951,-79.3...
4 15 Queens Quay E, Toronto, ON C$2,700+ https://zillow.com/b/Toronto-ON/43.64202,-79.3...
... ... ...
You can install the packages with pip install cloudscraper & pip install tqdm. The urls accessed are visible in Dev Tools, Network tab, and are providing JSON data which is loaded by Javascript into page.

moving to the next page till the end with selenium in python

I want to retrieve from the link below the first page can be retrieve but I have a problem for putting the loop for the next page till the end. May you help me and complete my code?
My link is:
https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01
from selenium import webdriver
import time
url = "https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01"
driver = webdriver.Chrome("C:\Program Files\Python310\chromedriver.exe")
driver.get(url)
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
print(cell.text)
it brings the first page data but I don't know how to retrieve the others.
Look for the next-page selector and iterate over it, if it is there, let it click after your extraction part. You wanna do that in a while loop for example which you break if selector can't be found.
from selenium import webdriver
import time
url = "https://www150.statcan.gc.ca/n1/pub/71-607-x/2021004/exp-eng.htm?r1=(1)&r2=0&r3=0&r4=12&r5=0&r7=0&r8=2022-02-01&r9=2022-02-01"
driver = webdriver.Chrome("C:\Program Files\Python310\chromedriver.exe")
driver.get(url)
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
print(cell.text)
while True:
next_page = driver.find_element(By.XPATH, '//a[#id="report_results_next"]')
if next_page:
# steps to extract if next_page
driver.get(next_page)
table = driver.find_element_by_id('report_table')
body = table.find_element_by_tag_name('tbody')
cells = body.find_elements_by_tag_name('td')
for cell in cells:
print(cell.text)
else:
# stop
break
This is not tested.

how do i webscrape using python and beautifulsoup?

Im very new to this, but I have an idea for a website and I want to give it a good go, my aim is to scrape the Asda website for prices and products, more specifically in this case whiskey. I want to grab the name and price of all the whiskey on the Asda website and put it into a nice table on my website, however I am having problems doing so, my code so far is getting syntax error, can anyone help?
the code so far is..
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
driver = webdriver.Chrome()
driver.get('https://groceries.asda.com/shelf/drinks/spirits-ready-to-drink/spirits/whisky/1579926650')
res = driver.execute_script('return document.documentElement.outerHTML')
html_soup = BeautifulSoup(res, 'html.parser')
type(html_soup)
driver.quit
response = requests.get('https://groceries.asda.com/shelf/drinks/spirits-ready-to-drink/spirits/whisky/1579926650'
whiskey_container = html_soup.find('div', {'class': 'co-product-lazy-container'})
for whiskey in whiskey_container:
name = whiskey.find('a', {'class': 'co-product__anchor'})
price = whiskey.find('div', {'class': 'co-product__price'})
print(name, price)
Try it:
# for wait time better than time.sleep()
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
import time # or WebDriverWait
import csv # for saving data in table
# save csv file
def save_csv(dct):
'''
dct - dictionary with our data:
"cap",
"title",
"price"
'''
name = "file.csv" # file name, it can choice what you want
print("[INFO] saving...") # for see that function works
with open(name, 'a', encoding="utf-8") as f: # open file for writing "a"
# this need for writing data to table
writer = csv.writer(f)
writer.writerow((dct['cap'],
dct['title'],
dct['price'],
))
def scroll(driver):
# for open all interesting us data
for i in range(1,6):
# driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.execute_script("window.scrollTo(0, 1000)")
time.sleep(7)
driver = webdriver.Firefox()
driver.get("https://groceries.asda.com/shelf/drinks/spirits-ready-to-drink/spirits/whisky/1579926650?facets=shelf%3A1579926650%3A0000&nutrition=&sortBy=&page=0")
for i in range(2): # 2 because we have only two page with data
element = WebDriverWait(driver, 30) # or time.sleep(30)
scroll(driver) # for open all interesting us data
# get all data to one list in beautifulsoup type
data = driver.find_elements_by_css_selector(".co-lazy-product-container .co-item")
# iterating interesting data and create dictionary with data
for d in data:
items = {}
body = d.text.split("\n")
items["cap"] = body[0]
items["title"] = body[1]
items["price"] = body[-2]
save_csv(items)
# pagination
driver.find_element_by_css_selector(".co-pagination__last-page").click()
# close driver
driver.quit()
you have syntax error, you have ")" missing :
response = requests.get('https://groceries.asda.com/shelf/drinks/spirits-ready-to-drink/spirits/whisky/1579926650'
it should be :
response = requests.get('https://groceries.asda.com/shelf/drinks/spirits-ready-to-drink/spirits/whisky/1579926650')
--
btw your code won't work. you have couple of logical errors.
and I doubt you can scrape that page with your current code.

How to extract data from a dropdown menu using python beautifulsoup

I am trying to scrape data from a website that has a multilevel drop-down menu every time an item is selected it changes the sub items for sub drop-downs.
problem is that for every loop it extracts same sub items from the drop down items. the selection happens but it do not update the items on behalf of new selection from loop
can any one help me why I am not getting the desired results.
Perhaps this is because my drop-down list is in java Script or something.
for instance like this manue in the picture below:
i have gone this far:
enter code here
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
import csv
//#from selenium.webdriver.support import Select
import time
print ("opening chorome....")
driver = webdriver.Chrome()
driver.get('https://www.wheelmax.com/')
time.sleep(10)
csvData = ['Year', 'Make', 'Model', 'Body', 'Submodel', 'Size']
//#variables
yeart = []
make= []
model=[]
body = []
submodel = []
size = []
Yindex = Mkindex = Mdindex = Bdindex = Smindex = Sindex = 0
print ("waiting for program to set variables....")
time.sleep(20)
print ("initializing and setting variables....")
//#initializing Year
Year = Select(driver.find_element_by_id("icm-years-select"))
Year.select_by_value('2020')
yr = driver.find_elements(By.XPATH, '//*[#id="icm-years-select"]')
time.sleep(15)
//#initializing Make
Make = Select(driver.find_element_by_id("icm-makes-select"))
Make.select_by_index(1)
mk = driver.find_elements(By.XPATH, '//*[#id="icm-makes-select"]')
time.sleep(15)
//#initializing Model
Model = Select(driver.find_element_by_id("icm-models-select"))
Model.select_by_index(1)
mdl = driver.find_elements(By.XPATH, '//*[#id="icm-models-select"]')
time.sleep(15)
//#initializing body
Body = Select(driver.find_element_by_id("icm-drivebodies-select"))
Body.select_by_index(1)
bdy = driver.find_elements(By.XPATH, '//*[#id="icm-drivebodies-select"]')
time.sleep(15)
//#initializing submodel
Submodel = Select(driver.find_element_by_id("icm-submodels-select"))
Submodel.select_by_index(1)
sbm = driver.find_elements(By.XPATH, '//*[#id="icm-submodels-select"]')
time.sleep(15)
//#initializing size
Size = Select(driver.find_element_by_id("icm-sizes-select"))
Size.select_by_index(0)
siz = driver.find_elements(By.XPATH, '//*[#id="icm-sizes-select"]')
time.sleep(5)
Cyr = Cmk = Cmd = Cbd = Csmd = Csz = ""
print ("fetching data from variables....")
for y in yr:
obj1 = driver.find_element_by_id("icm-years-select")
Year = Select(obj1)
Year.select_by_index(++Yindex)
obj1.click()
#obj1.click()
yeart.append(y.text)
Cyr = y.text
time.sleep(10)
for m in mk:
obj2 = driver.find_element_by_id("icm-makes-select")
Make = Select(obj2)
Make.select_by_index(++Mkindex)
obj2.click()
#obj2.click()
make.append(m.text)
Cmk = m.text
time.sleep(10)
for md in mdl:
Mdindex =0
obj3 = driver.find_element_by_id("icm-models-select")
Model = Select(obj3)
Model.select_by_index(++Mdindex)
obj3.click()
#obj3.click(clickobj)
model.append(md.text)
Cmd = md.text
time.sleep(10)
Bdindex = 0
for bd in bdy:
obj4 = driver.find_element_by_id("icm-drivebodies-select")
Body = Select(obj4)
Body.select_by_index(++Bdindex)
obj4.click()
#obj4.click(clickobj2)
body.append(bd.text)
Cbd = bd.text
time.sleep(10)
Smindex = 0
for sm in sbm:
obj5 = driver.find_element_by_id("icm-submodels-select")
Submodel = Select(obj5)
obj5.click()
Submodel.select_by_index(++Smindex)
#obj5.click(clickobj5)
submodel.append(sm.text)
Csmd = sm.text
time.sleep(10)
Sindex = 0
for sz in siz:
Size = Select(driver.find_element_by_id("icm-sizes-select"))
Size.select_by_index(++Sindex)
size.append(sz.text)
Scz = sz.text
csvData += [Cyr, Cmk, Cmd, Cbd,Csmd, Csz]
Because of https://www.wheelmax.com has multilevel drop-down menu dependent on each other for example if you select Select Year drop down option, after selected year based on Select Make drop down is enable and display option based on the selected year option.
So basically you need to use Selenium package for handle dynamic option.
Install selenium web driver as per your browser
Download chrome web driver :
http://chromedriver.chromium.org/downloads
Install web driver for chrome browser:
unzip ~/Downloads/chromedriver_linux64.zip -d ~/Downloads
chmod +x ~/Downloads/chromedriver
sudo mv -f ~/Downloads/chromedriver /usr/local/share/chromedriver
sudo ln -s /usr/local/share/chromedriver /usr/local/bin/chromedriver
sudo ln -s /usr/local/share/chromedriver /usr/bin/chromedriver
selenium tutorial
https://selenium-python.readthedocs.io/
Eg. using selenium to select multiple dropdown options
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
driver = webdriver.Chrome()
driver.get('https://www.wheelmax.com/')
time.sleep(4)
selectYear = Select(driver.find_element_by_id("icm-years-select"))
selectYear.select_by_value('2019')
time.sleep(2)
selectMakes = Select(driver.find_element_by_id("icm-makes-select"))
selectMakes.select_by_value('58')
Update:
select drop down option value or count total options
for option in selectYear.options:
print(option.text)
print(len(selectYear.options))
Se more
How to extract data from a dropdown menu using python beautifulsoup
The page does a callback to populate with years. Simply mimic that.
If you actually need to change years and select from dependent drop downs, which becomes a different question, you need browser automation e.g. selenium, or to manually perform this and inspect network tab to see if there is an xhr request you can mimic to submit your choices.
import requests
​
r = requests.get('https://www.iconfigurators.com/json2/?returnType=json&bypass=true&id=13898&callback=yearObj').json()
years = [item['year'] for item in r['years']]
print(years)
I guess the reason you can't parse the years with beautiful soup is because the 'select' tag containing the 'option' tags with all the years is not present yet/is hidden at the moment when beautiful soup downloads the page. It is added to the DOM by executing additional JavaScript I assume. If you look at the DOM of the loaded page using the developer tools of your browser, for example F12 for Mozilla, you'll see that the tag containing the information you look for is: <select id="icm-years-select"">. If you try to parse for this tag with the object downloaded with beautiful soup, you get an empty list of tag objects:
from bs4 import BeautifulSoup
from requests import get
response = get('https://www.wheelmax.com/')
yourSoup = BeautifulSoup(response.text, "lxml")
print(len(yourSoup.select('div #vehicle-search'))) // length = 1 -> visible
print()
print(len(yourSoup.select('#icm-years-select'))) // length = 0 -> not visible
So if you want to get the years by using Python by all means, I guess you might try to click on the respective tag and then parse again using some combination of requests/beautiful soup/ or the selenium module which will require a bit more digging :-)
Otherwise if you just quickly need the years parsed, use JavaScript:
countYears = document.getElementById('icm-years-select').length;
yearArray = [];
for (i = 0; i < countYears; i++) {yearArray.push(document.getElementById('icm-years-select')[i].value)};

How to Automate datepicker in python selenium?

I need to search for a flight with python selenium but I couldn't able to select my desirable date.
import time
import selenium
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.spicejet.com/")
departureButton = browser.find_element_by_id("ctl00_mainContent_ddl_originStation1_CTXT")
departureButton.click()
browser.find_element_by_partial_link_text("Kolkata").click()
arivalButton = browser.find_element_by_id("ctl00_mainContent_ddl_destinationStation1_CTXT")
arivalButton.click()
browser.find_element_by_partial_link_text("Goa").click()
date_position = browser.find_element_by_id("ctl00_mainContent_view_date1")
date_position.click()
search_date = "10-September 2019"
dep_date1 = search_date.split("-")
dep_month = dep_date[1]
dep_day = dep_date[0]
cal_head = browser.find_elements_by_class_name("ui-datepicker-title")
for month_hd in cal_head:
month_year = month_hd.text
if dep_month == month_year:
pass
else:
nxt = browser.find_element_by_class_name("ui-icon-circle-triangle-e").click()
print(month_year)
time.sleep(2)
browser.close()
The problem with your code is that when you click on the next button the DOM changes and the element reference saved in your variables are not updated. That is why it gives you stale element reference exception. Instead of using variables, use the locator for every time you access the calendar elements and it will work.
Try This :
import time
import selenium
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.spicejet.com/")
departureButton = browser.find_element_by_id("ctl00_mainContent_ddl_originStation1_CTXT")
departureButton.click()
browser.find_element_by_partial_link_text("Kolkata").click()
arivalButton = browser.find_element_by_id("ctl00_mainContent_ddl_destinationStation1_CTXT")
arivalButton.click()
browser.find_element_by_partial_link_text("Goa").click()
date_position = browser.find_element_by_id("ctl00_mainContent_view_date1")
date_position.click()
search_date = "10-September 2019"
dep_date = search_date.split("-")
dep_month = dep_date[1]
dep_day = dep_date[0]
while browser.find_element_by_class_name("ui-datepicker-title").text != dep_month:
browser.find_element_by_css_selector("a[data-handler='next']").click()
browser.find_element_by_xpath("//table//a[text()='"+dep_day+"']").click()
time.sleep(2)
browser.close()

Categories