How to check current date and move to next date - python

I'm having a python issue which I cannot seem to understand. Not sure if I need to use if statements but because I'm new to python, I'm not actually sure how to code this little issue.
Virtually this is the issue I have. For the departure calendar, I want python to be able to do the following:
View 'Your date'. If there's a flight (doesn't matter if lowfare or normal), click it. If not then move onto the next available date that does have a flight and click that.
Will need to be able to move to the next month if no date is available in the current month (I have an example code for this).
For the return calendar, I want it to do the same thing but ensure it selects a date at least 7 days after the selected departure date.
That's virtually my question, how to do that?
Below is the html of the depature calendar (return calendar is exactly the same except it's inboundsearchresults rather than outbound search results):
Below I have a sample code which works when selecting from an ordinary date picker (this is used in the page before the url) if you want to use that template and manipulate it:
# select depart date
datepicker = driver.find_element_by_id("departure-date-selector")
actions.move_to_element(datepicker).click().perform()
# find the calendar, month and year picker and the current date
calendar = driver.find_element_by_id("departureDateContainer")
month_picker = Select(calendar.find_element_by_class_name("ui-datepicker-month"))
year_picker = Select(calendar.find_element_by_class_name("ui-datepicker-year"))
current_date = calendar.find_element_by_class_name("ui-datepicker-current-day")
# printing out current date
month = month_picker.first_selected_option.text
year = year_picker.first_selected_option.text
print("Current departure date: {day} {month} {year}".format(day=current_date.text, month=month, year=year))
# see if we have an available date in this month
try:
next_available_date = current_date.find_element_by_xpath("following::td[#data-handler='selectDay' and ancestor::div/#id='departureDateContainer']")
print("Found an available departure date: {day} {month} {year}".format(day=next_available_date.text, month=month, year=year))
next_available_date.click()
except NoSuchElementException:
# looping over until the next available date found
while True:
# click next, if not found, select the next year
try:
calendar.find_element_by_class_name("ui-datepicker-next").click()
except NoSuchElementException:
# select next year
year = Select(calendar.find_element_by_class_name("ui-datepicker-year"))
year.select_by_visible_text(str(int(year.first_selected_option.text) + 1))
# reporting current processed month and year
month = Select(calendar.find_element_by_class_name("ui-datepicker-month")).first_selected_option.text
year = Select(calendar.find_element_by_class_name("ui-datepicker-year")).first_selected_option.text
print("Processing {month} {year}".format(month=month, year=year))
try:
next_available_date = calendar.find_element_by_xpath(".//td[#data-handler='selectDay']")
print("Found an available departure date: {day} {month} {year}".format(day=next_available_date.text, month=month, year=year))
next_available_date.click()
break
except NoSuchElementException:
continue

The idea is to define a reusable function - calling it select_date() that receives a "calendar" WebElement and an optional minimum date. This function would first look for the Your date in the calendar and if it is there and it is more than minimum (if given) click it and return the date. If there is no Your date, look for the available "flight" days and, if minimum date is given and the date is more than or equal to it, click it and return the date.
Working implementation:
from datetime import datetime, timedelta
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def select_date(calendar, mininum_date=None):
try:
# check if "Your Date" is there
your_date_elm = calendar.find_element_by_class_name("your-date")
your_date = your_date_elm.get_attribute("data-date")
print("Found 'Your Date': " + your_date)
your_date_elm.click()
# check if your_date against the minimum date if given
your_date = datetime.strptime(your_date, "%Y-%m-%d")
if mininum_date and your_date < mininum_date:
raise NoSuchElementException("Minimum date violation")
return your_date
except NoSuchElementException:
flight_date = None
flight_date_elm = None
while True:
print("Processing " + calendar.find_element_by_css_selector("div.subheader > p").text)
try:
if mininum_date:
flight_date_elms = calendar.find_elements_by_class_name("flights")
flight_date_elm = next(flight_date_elm for flight_date_elm in flight_date_elms
if datetime.strptime(flight_date_elm.get_attribute("data-date"), "%Y-%m-%d") >= mininum_date)
else:
flight_date_elm = calendar.find_element_by_class_name("flights")
except (StopIteration, NoSuchElementException):
calendar.find_element_by_partial_link_text("Next month").click()
# if found - print out the date, click and exit the loop
if flight_date_elm:
flight_date = flight_date_elm.get_attribute("data-date")
print("Found 'Flight Date': " + flight_date)
flight_date_elm.click()
break
return datetime.strptime(flight_date, "%Y-%m-%d")
driver = webdriver.Firefox()
driver.get("http://www.jet2.com/cheap-flights/leeds-bradford/antalya/2016-03-01/2016-04-12?adults=2&children=2&infants=1&childages=4%2c6")
wait = WebDriverWait(driver, 10)
# get the outbound date
outbound = wait.until(EC.visibility_of_element_located((By.ID, "outboundsearchresults")))
outbound_date = select_date(outbound)
# get the inbound date
inbound = driver.find_element_by_id("inboundsearchresults")
inbound_minimum_date = outbound_date + timedelta(days=7)
inbound_date = select_date(inbound, mininum_date=inbound_minimum_date)
print(outbound_date, inbound_date)
driver.close()
For the provided in the question URL, it prints:
Processing March 2016
Found 'Flight Date': 2016-03-28
Processing April 2016
Found 'Flight Date': 2016-04-04
2016-03-28 00:00:00 2016-04-04 00:00:00
The two dates printed at the end are the departure and the return dates.
Let me know if you need any clarifications and hope it helps.

Related

how to read user date selection off an existing calendar

I'm learning web scraping and I need the webdriver to wait until the user selects a start date and end date off an existing calendar from here and read it so I can process the availabilities in that given period. I hope somebody can help me!
here's the part of the code:
tables = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table[#role='grid']")))
table_first_month = tables[0].find_element(By.TAG_NAME, "tbody")
all_dates = table_first_month.find_elements(By.XPATH, "//td[#role='gridcell']")
for date in all_dates:
date_span = date.find_element(By.TAG_NAME, "span")
aria_label_span = date_span.get_attribute("aria-label")
print(aria_label_span)
#userStartDate = wait.until(EC.element_to_be_clickable((this is where i need help)))
if aria_label_span == str(userStartDate):
date_span.click()
time.sleep(4)
break
this code gets the avalaibale dates in calendar for the shown two months and verifies the condition that the given date (the user will select) exists with the help of this function
def press_right_arrow_until_date_is_found(date):
# get the text of the initial calendar
current_calendar = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/form[1]/div[1]/div[3]/div[4]/div[1]/div[1]").text
# while the date does not appear in the calendar view press right arrow until it does
while(date_formater(date) not in current_calendar):
right_arrow = driver.find_element(By.XPATH,
"//button[#class='fc63351294 a822bdf511 e3c025e003 fa565176a8 cfb238afa1 ae1678b153 c9fa5fc96d be298b15fa']")
right_arrow.click()
current_calendar = driver.find_element(By.XPATH,
"/html[1]/body[1]/div[2]/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/form[1]/div[1]/div[3]/div[4]/div[1]/div[1]").text

Cleaner way to write repeated code in python?

Using selenium I'm downloading some files from a webpage. On Monday's I need to download the info for Friday, Saturday, and Sunday. Every other day I only need yesterday. I wrote an if/else statement to accomplish this and just copy and pasted the code into the else statement. There must be a more pythonic way to write this but I'm still new to this.
today = datetime.date.today()
yesterday = str(today - timedelta(days=1))
if today.weekday() == 0:
fri = str(today - timedelta(days=3))
sat = str(today - timedelta(days=2))
weekend = [fri, sat, yesterday]
for day in weekend:
# Needs to go first otherwise page won't load
date_field = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]""")
date_field.send_keys(day)
org_list = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]/option[text()=\"string\"]""").click()
delay = 5
try:
table_chk = WebDriverWait(driver, delay).until(
EC.presence_of_element_located((By.XPATH, """//*[#id="id blah blah"]""")))
export_btn = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]""")
export_btn.click()
date_field = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]""")
date_field.clear()
org_list = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]/option[1]""").click()
except TimeoutException:
print("Loading took too much time!")
time.sleep(2)
else:
# Needs to go first otherwise it doesn't work
date_field = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]""")
date_field.send_keys(yesterday)
org_list = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]/option[text()=\"string\"]""").click()
delay = 5
try:
table_chk = WebDriverWait(driver, delay).until(
EC.presence_of_element_located((By.XPATH, """//*[#id="id blah blah"]""")))
export_btn = driver.find_element_by_xpath(
"""//*[#id="id blah blah"]""")
export_btn.click()
except TimeoutException:
print("Loading took too much time!")
How can I efficiently repeat the code but have it run multiple times on Monday for Fri, Sat, Sun and just once for the day before, every other day of the week?
Make it always loop, but programmatically define the collection to loop over as a single element most of the time, and multiple days when needed:
today = datetime.date.today()
# No need to define yesterday; we'll make it as needed next
if today.weekday() == 0:
# Today is Monday, quickly get the days for Friday-Sunday
days = [today - timedelta(days=i) for i in (3, 2, 1)]
else:
# Today is not Monday, just check yesterday
days = [today - timedelta(days=1)]
# days is now either one element list of just yesterday, or the whole weekend
# loop runs once or three times, as needed, with the same code
for day in days:
# Complete body of original for day in weekend: loop goes here
If you really want to get code duplication to a minimum, you could reduce the code before the loop to:
today = datetime.date.today()
num_days_to_check = 3 if today.weekday() == 0 else 1
days = [today - timedelta(days=i) for i in range(num_days_to_check, 0, -1)]
since really, all that differs is how many prior days you need to check, 1 or 3, so the conditional can simplify to a one-liner choosing between the two, and the rest is just based on that initial decision point.

How to use selenium to handle and select elements from the popup/hidden form/table?

I would like to select the popup date table/calendar from the below website by using selenium. i tried to add double click function in it, but it was failed to select the date that i wanted.
from selenium.webdriver.common.action_chains import ActionChains
ccass = driver.get('http://www.hkexnews.hk/sdw/search/searchsdw_c.aspx')
ticker = '00001'
menu = driver.find_element_by_xpath("#date-picker-popup").click()
ccass_search_year = driver.find_element_by_xpath('//*[#id="date-picker"]/div[1]/b[1]/ul/li[2]/button').click()
actions.double_click(ccass_search_year)
ccass_search_month = driver.find_element_by_xpath('//*[#id="date-picker"]/div[1]/b[2]/ul/li[4]/button').click()
actions.double_click(ccass_search_month)
ccass_search_day = driver.find_element_by_xpath('//*[#id="date-picker"]/div[1]/b[3]/ul/li[4]/button').click()
actions.double_click(ccass_search_day)
ccass_search = driver.find_element_by_xpath('//*[#id="txtStockCode"]').send_keys(ticker) #Keys.ENTER)
ccass_search_click = driver.find_element_by_xpath('//*[#id="btnSearch"]').click()
The date you are trying to select is disabled. You can't select 3rd April 2018. You can only select from 10th April and I'm guessing that it will be disabled tomorrow.
Sorry to say, you are too late. Which the class name for the disabled dates also.
i edited for a little bit, the problem was that although i found all those buttons from the popup date picker, but i could not get the result of the date that i selected:
year = year_list['2019']
month = month_list['4']
day = day_list['4']
year_list = {'2018':'//*[#id="date-picker"]/div[1]/b[1]/ul/li[1]/button', '2019':'//*[#id="date-picker"]/div[1]/b[1]/ul/li[2]/button'}
month_list = {'1':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[1]/button', '2':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[2]/button', '3':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[3]/button', '4':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[4]/button', '5':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[5]/button', '6':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[6]/button', '7':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[7]/button', '8':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[8]/button', '9':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[9]/button', '10':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[10]/button', '11':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[11]/button', '12':'//*[#id="date-picker"]/div[1]/b[2]/ul/li[12]/button'}
day_list = {'1':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[1]/button','2':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[2]/button','3':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[3]/button','4':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[4]/button','5':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[5]/button','6':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[6]/button','7':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[7]/button','8':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[8]/button','9':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[9]/button','10':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[10]/button','11':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[11]/button','12':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[12]/button','13':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[13]/button','14':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[14]/button','15':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[15]/button','16':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[16]/button','17':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[17]/button','18':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[18]/button','19':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[19]/button','20':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[20]/button','21':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[21]/button','22':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[22]/button','23':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[23]/button','24':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[24]/button','25':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[25]/button','26':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[26]/button','27':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[27]/button','28':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[28]/button','29':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[29]/button','30':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[30]/button','31':' //*[#id="date-picker"]/div[1]/b[3]/ul/li[31]/button'}
ccass = driver.get('http://www.hkexnews.hk/sdw/search/searchsdw_c.aspx')
popup_datepicker = driver.find_element_by_xpath('//*[#id="txtShareholdingDate"]').click()
ccass_search_year = driver.find_element_by_xpath(year).click()
actions.double_click(ccass_search_year)
ccass_search_month = driver.find_element_by_xpath(month).click()
actions.double_click(ccass_search_month)
ccass_search_day = driver.find_element_by_xpath(day).click()
actions.double_click(ccass_search_day)
ccass_search = driver.find_element_by_xpath('//*[#id="txtStockCode"]').send_keys(ticker) #Keys.ENTER)
ccass_search_click = driver.find_element_by_xpath('//*[#id="btnSearch"]').click()

Stop a loop if information is not found

I created a web scraping program that open several URLs, it checks which one of the URLs has information related to "tomorrow"s date and then it prints some specific information that is on that URL. My problem is that sometimes none of the URLs in that list has information concerning "tomorrow". So I would like that in such case, the program prints other innformation like "no data found". How could I accomplish that? Other doubt I have, do I need the while loop at the beginning? Thanks.
My code is:
from datetime import datetime, timedelta
tomorrow = datetime.now() + timedelta(days=1)
tomorrow = tomorrow.strftime('%d-%m-%Y')
day = ""
while day != tomorrow:
for url in list_urls:
browser.get(url)
time.sleep(1)
dia_page = browser.find_element_by_xpath("//*[#id='item2']/b").text
dia_page = dia_page[-10:]
day_uns = datetime.strptime(dia_page, "%d-%m-%Y")
day = day_uns.strftime('%d-%m-%Y')
if day == tomorrow:
meals = browser.find_elements_by_xpath("//*[#id='item2']/span")
meal_reg = browser.find_element_by_xpath("//*[#id='item_frm']/span[1]").text
sopa2 = (meals[0].text)
refeicao2 = (meals[1].text)
sobremesa2 = (meals[2].text)
print(meal_reg)
print(sopa2)
print(refeicao2)
print(sobremesa2)
break
No need for a while loop, you can use the for-else Python construct for this:
for url in list_urls:
# do stuff
if day == tomorrow:
# do and print stuff
break
else: # break never encountered
print("no data found")

Using pandas to scrape weather data from wundergound

I came across a very useful set of scripts on the Shane Lynn for the
Analysis of Weather data. The first script, used to scrape data from Weather Underground, is as follows:
import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time
def getRainfallData(station, day, month, year):
"""
Function to return a data frame of minute-level weather data for a single Wunderground PWS station.
Args:
station (string): Station code from the Wunderground website
day (int): Day of month for which data is requested
month (int): Month for which data is requested
year (int): Year for which data is requested
Returns:
Pandas Dataframe with weather data for specified station and date.
"""
url = "http://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID={station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
full_url = url.format(station=station, day=day, month=month, year=year)
# Request data from wunderground data
response = requests.get(full_url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
data = response.text
# remove the excess <br> from the text data
data = data.replace('<br>', '')
# Convert to pandas dataframe (fails if issues with weather station)
try:
dataframe = pd.read_csv(io.StringIO(data), index_col=False)
dataframe['station'] = station
except Exception as e:
print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
return None
return dataframe
# Generate a list of all of the dates we want data for
start_date = "2016-08-01"
end_date = "2016-08-31"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))
# Create a list of stations here to download data for
stations = ["ILONDON28"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}
# Gather data for each station in turn and save to CSV.
for station in stations:
print("Working on {}".format(station))
data[station] = []
for date in dates:
# Print period status update messages
if date.day % 10 == 0:
print("Working on date: {} for station {}".format(date, station))
done = False
while done == False:
try:
weather_data = getRainfallData(station, date.day, date.month, date.year)
done = True
except ConnectionError as e:
# May get rate limited by Wunderground.com, backoff if so.
print("Got connection error on {}".format(date))
print("Will retry in {} seconds".format(backoff_time))
time.sleep(10)
# Add each processed date to the overall data
data[station].append(weather_data)
# Finally combine all of the individual days and output to CSV for analysis.
pd.concat(data[station]).to_csv("data/{}_weather.csv".format(station))
However, I get the error:
Working on ILONDONL28
Issue with date: 1-8-2016 for station ILONDONL28
Issue with date: 2-8-2016 for station ILONDONL28
Issue with date: 3-8-2016 for station ILONDONL28
Issue with date: 4-8-2016 for station ILONDONL28
Issue with date: 5-8-2016 for station ILONDONL28
Issue with date: 6-8-2016 for station ILONDONL28
Can anyone help me with this error?
The data for the chosen station and the time period is available, as shown at this link.
The output you are getting is because an exception is being raised. If you added a print e you would see that this is because import io was missing from the top of the script. Secondly, the station name you gave was out by one character. Try the following:
import io
import requests
import pandas as pd
from dateutil import parser, rrule
from datetime import datetime, time, date
import time
def getRainfallData(station, day, month, year):
"""
Function to return a data frame of minute-level weather data for a single Wunderground PWS station.
Args:
station (string): Station code from the Wunderground website
day (int): Day of month for which data is requested
month (int): Month for which data is requested
year (int): Year for which data is requested
Returns:
Pandas Dataframe with weather data for specified station and date.
"""
url = "http://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID={station}&day={day}&month={month}&year={year}&graphspan=day&format=1"
full_url = url.format(station=station, day=day, month=month, year=year)
# Request data from wunderground data
response = requests.get(full_url)
data = response.text
# remove the excess <br> from the text data
data = data.replace('<br>', '')
# Convert to pandas dataframe (fails if issues with weather station)
try:
dataframe = pd.read_csv(io.StringIO(data), index_col=False)
dataframe['station'] = station
except Exception as e:
print("Issue with date: {}-{}-{} for station {}".format(day,month,year, station))
return None
return dataframe
# Generate a list of all of the dates we want data for
start_date = "2016-08-01"
end_date = "2016-08-31"
start = parser.parse(start_date)
end = parser.parse(end_date)
dates = list(rrule.rrule(rrule.DAILY, dtstart=start, until=end))
# Create a list of stations here to download data for
stations = ["ILONDONL28"]
# Set a backoff time in seconds if a request fails
backoff_time = 10
data = {}
# Gather data for each station in turn and save to CSV.
for station in stations:
print("Working on {}".format(station))
data[station] = []
for date in dates:
# Print period status update messages
if date.day % 10 == 0:
print("Working on date: {} for station {}".format(date, station))
done = False
while done == False:
try:
weather_data = getRainfallData(station, date.day, date.month, date.year)
done = True
except ConnectionError as e:
# May get rate limited by Wunderground.com, backoff if so.
print("Got connection error on {}".format(date))
print("Will retry in {} seconds".format(backoff_time))
time.sleep(10)
# Add each processed date to the overall data
data[station].append(weather_data)
# Finally combine all of the individual days and output to CSV for analysis.
pd.concat(data[station]).to_csv(r"data/{}_weather.csv".format(station))
Giving you an output CSV file starting as follows:
,Time,TemperatureC,DewpointC,PressurehPa,WindDirection,WindDirectionDegrees,WindSpeedKMH,WindSpeedGustKMH,Humidity,HourlyPrecipMM,Conditions,Clouds,dailyrainMM,SoftwareType,DateUTC,station
0,2016-08-01 00:05:00,17.8,11.6,1017.5,ESE,120,0.0,0.0,67,0.0,,,0.0,WeatherCatV2.31B93,2016-07-31 23:05:00,ILONDONL28
1,2016-08-01 00:20:00,17.7,11.0,1017.5,SE,141,0.0,0.0,65,0.0,,,0.0,WeatherCatV2.31B93,2016-07-31 23:20:00,ILONDONL28
2,2016-08-01 00:35:00,17.5,10.8,1017.5,South,174,0.0,0.0,65,0.0,,,0.0,WeatherCatV2.31B93,2016-07-31 23:35:00,ILONDONL28
If you are not getting a CSV file, I suggest you add a full path to the output filename.

Categories