I'm trying to send the keys by passing a variable inside it (var name : NextDayFormatted).Because it is a string and to avoid any future error I'm using the below code-
browser.find_element_by_css_selector("...").send_keys(NextDayFormatted + Keys.Enter)
Whenever I'm running this single line of code independently, it works fine. But, when I'm running all , it fails. It is passing the value that I have assigned to my variable (NextDayFormatted) but doesn't hit ENTER due to which the next line of code is failing.
PS: This issue persists in ENTER and RETURN both.
Can you please guide me on this?
Full code:
browser.get("url")
username = "abcd"
pwd = "def"
email = browser.find_element_by_name('email')
email.send_keys(username)
password = browser.find_element_by_name('password')
password.send_keys(pwd)
browser.find_element_by_tag_name('button').click()
#Click on filter icon
browser.find_element_by_xpath('....').click()
#Click Add filter
browser.find_element_by_xpath('....').click()
#Select the fields from the dropdown - Appointment Start Date & After
browser.find_element_by_xpath('....').click()
#The file needs to be extracted on every fridays therefore apply the appointment strt date logic in the search box.
Next_Date = datetime.datetime.today() + datetime.timedelta(days = 2)
Next_Date_Formatted = next_date.strftime('%m/%d/%Y')
#Applying the above variable in the search box
browser.find_element_by_css_selector('....').send_keys(Next_Date_Formatted + Keys.ENTER)`enter code here`
#Click on submit button
browser.find_element_by_css_selector('....').click()
Related
I'm learning web scraping and I need the webdriver to wait until the user selects a start date and end date off an existing calendar from here and read it so I can process the availabilities in that given period. I hope somebody can help me!
here's the part of the code:
tables = wait.until(EC.presence_of_all_elements_located((By.XPATH, "//table[#role='grid']")))
table_first_month = tables[0].find_element(By.TAG_NAME, "tbody")
all_dates = table_first_month.find_elements(By.XPATH, "//td[#role='gridcell']")
for date in all_dates:
date_span = date.find_element(By.TAG_NAME, "span")
aria_label_span = date_span.get_attribute("aria-label")
print(aria_label_span)
#userStartDate = wait.until(EC.element_to_be_clickable((this is where i need help)))
if aria_label_span == str(userStartDate):
date_span.click()
time.sleep(4)
break
this code gets the avalaibale dates in calendar for the shown two months and verifies the condition that the given date (the user will select) exists with the help of this function
def press_right_arrow_until_date_is_found(date):
# get the text of the initial calendar
current_calendar = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/form[1]/div[1]/div[3]/div[4]/div[1]/div[1]").text
# while the date does not appear in the calendar view press right arrow until it does
while(date_formater(date) not in current_calendar):
right_arrow = driver.find_element(By.XPATH,
"//button[#class='fc63351294 a822bdf511 e3c025e003 fa565176a8 cfb238afa1 ae1678b153 c9fa5fc96d be298b15fa']")
right_arrow.click()
current_calendar = driver.find_element(By.XPATH,
"/html[1]/body[1]/div[2]/div[1]/div[3]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/form[1]/div[1]/div[3]/div[4]/div[1]/div[1]").text
I am trying to make a selenium python script to collect data from each job in an indeed job search. I can easily get the data from the first and second page. The problem I am running into is while looping through the pages, the script only clicks the next page and the previous page, in that order. Going from page 1 -> 2 -> 1 -> 2 -> ect. I know it is doing this because both the next and previous button have the same class name. So when I redeclare the webelement variable when the page uploads, it hits the previous button because that is the first location of the class in the stack. I tried making it always click the next button by using the xpath, but I still run into the same errors. I would inspect the next button element, and copy the full xpath. my code is below, I am using python 3.7.9 and pip version 21.2.4
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
PATH = "C:\Program Files (x86)\chromedriver.exe"
driver = webdriver.Chrome(PATH)
HTTPS = "https://"
# hard coded data to test
siteDomain = "indeed.com"
jobSearch = "Software Developer"
locationSearch = "Richmond, VA"
listOfJobs = []
def if_exists_by_id(id):
try:
driver.find_element_by_id(id)
except NoSuchElementException:
return False
return True
def if_exists_by_class_name(class_name):
try:
driver.find_element_by_class_name(class_name)
except NoSuchElementException:
return False
return True
def if_exists_by_xpath(xpath):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
def removeSpaces(strArray):
newjobCounter = 0
jobCounter = 0
for i, word in enumerate(strArray):
jobCounter += 1
if strArray[i].__contains__("\n"):
strArray[i] = strArray[i].replace("\n", " ")
if strArray[i].__contains__("new"):
newjobCounter += 1
print(strArray[i] + "\n")
if newjobCounter == 0:
print("Unfortunately, there are no new jobs for this search")
else:
print("With " + str(newjobCounter) + " out of " + str(jobCounter) + " new jobs!")
return strArray
try:
# Goes to Site
driver.get(HTTPS + siteDomain)
# obtains access to elements from website
searchJob = driver.find_element_by_name("q")
searchLocation = driver.find_element_by_name("l")
# clear text field
searchJob.send_keys(Keys.CONTROL, "a", Keys.BACK_SPACE)
searchLocation.send_keys(Keys.CONTROL, "a", Keys.BACK_SPACE)
# inputs values into website elements
searchJob.send_keys(jobSearch)
searchLocation.send_keys(locationSearch)
# presses button to search
searchLocation.send_keys(Keys.RETURN)
# Begin looping through pages
pageList = driver.find_element_by_class_name("pagination")
page = pageList.find_elements_by_tag_name("li")
numPages = 0
for i,x in enumerate(page):
time.sleep(1)
# checks for popup, if there is popup, exit out and sleep
if if_exists_by_id("popover-x"):
driver.find_element_by_id("popover-x").click()
time.sleep(1)
# increment page counter variabke
numPages += 1
# obtains data in class name value
jobCards = driver.find_elements_by_class_name("jobCard_mainContent")
# prints number of jobs returned
print(str(len(jobCards)) + " jobs in: " + locationSearch)
# inserts each job into list of jobs array
# commented out to make debugging easier
# for jobCard in jobCards:
# listOfJobs.append(jobCard.text)
# supposed to click the next page, but keeps alternating
# between next page and previous page
driver.find_element_by_class_name("np").click()
print("On page number: " + str(numPages))
# print(removeSpaces(listOfJobs))
except ValueError:
print(ValueError)
finally:
driver.quit()
Any help will be greatly appreciated, also if I am implementing bad coding practices in the structure of the script please let me know as I am trying to learn as much as possible! :)
I have tested your code.. the thing is there are 2 'np' class elements when we go to the 2nd page.. what you can do is for first time use find_element_by_class_name('np') and for all the other time use find_elements_by_class_name('np')[1] that will select the next button.. and you can use find_elements_by_class_name('np')[0] for the previous button if needed. Here is the code!
if i == 0:
driver.find_element_by_class_name("np").click()
else:
driver.find_elements_by_class_name("np")[1].click()
Just replace the line driver.find_element_by_class_name("np").click() with the code snippet above.. I have tested it and it worked like a charm.
Also i am not as experienced as the other devs here.. But i am glad if i could help you. (This is my first answer ever on stackoverflow)
Blockquote
I am working on a project where I need to scrape data from a graph that shows the data for 1 day. For example: I want to get al the data for 2017 so I have to enter a new date in the datepicker 365 times. The problem is that although I am very specific in my XPATH call, the script finds way to many webelements. Many of which are not even compliant to my restrictions in my XPATH. This only happens some way into the loop and every loop the script finds more and more web elements.
The code that I am using:
Date_vec = pd.date_range(start="2017-01-01",end="2021-2-28")
DatePicker = web.find_element_by_xpath('/html/body/form/table/tbody/tr/td/table/tbody/tr/td[2]/div/div[1]/div[2]/div[2]/div[2]/div/div[2]/div/table/tbody/tr[2]/td/table/tbody/tr/td[2]/span/input')
month_prev = 0
year_prev = 0
for i in Date_vec:
DatePicker = web.find_element_by_xpath('/html/body/form/table/tbody/tr/td/table/tbody/tr/td[2]/div/div[1]/div[2]/div[2]/div[2]/div/div[2]/div/table/tbody/tr[2]/td/table/tbody/tr/td[2]/span/input')
DatePicker.click()
if i.year != year_prev:
# Year_button = web.find_element(By.XPATH,"//span[#onclick = 'basicDatePicker.ehYearSelectorClick(this)']")
Year_button = web.find_elements(By.XPATH,".//span[#onclick = 'basicDatePicker.ehYearSelectorClick(this)']")
Year_button[-1].click()
Year_choice = web.find_elements(By.XPATH,"//a[normalize-space(text()) ='"+ str(i.year) + "']")
Year_choice[-1].click()
elif i.month != month_prev:
Month_button = web.find_elements(By.XPATH,"//span[#onclick = 'basicDatePicker.ehMonthSelectorClick(this)']")
Month_button[-1].click()
Month_choice = web.find_elements_by_class_name('bdpMonthItem')
Month_choice[i.month-1].click()
Day_button = web.find_elements(By.XPATH,"//a[normalize-space(text()) ='"+ str(i.day) + "' and contains(#class, 'bdpDay')]")
Day_button[-1].click()
time.sleep(3)
month_prev = i.month
year_prev = i.year
For example, the problem arises at the line below:
Day_button = web.find_elements(By.XPATH,"//a[normalize-space(text()) ='"+ str(i.day) + "' and contains(#class, 'bdpDay')]")
This line returns 4 elements of which 2 don't have any text in them. I checked this with the following line.
test1 = [i.text for i in Day_button]
So my question basically is: Why does the line code return 4 elements of which two don't have text while I explicitly tell it to have the current day as text. Any help is appreciated.
edit: for clarity, I added a snip from the datepicker in question:
I am trying to open up several URL's (because they contain data I want to append to a list). I have a logic saying "if amount in icl_dollar_amount_l" then run the rest of the code. However, I want the script to only run the rest of the code on that specific amount in the variable "amount".
Example:
selenium opens up X amount of links and sees ['144,827.95', '5,199,024.87', '130,710.67'] in icl_dollar_amount_l but i want it to skip '144,827.95', '5,199,024.87' and only get the information for '130,710.67' which is in the 'amount' variable already.
Actual results:
Its getting webscaping information for amount '144,827.95' only and not even going to '5,199,024.87', '130,710.67'. I only want it getting webscaping information for '130,710.67' because my amount variable has this as the only amount.
print(icl_dollar_amount_l)
['144,827.95', '5,199,024.87', '130,710.67']
print(amount)
'130,710.67'
file2.py
def scrapeBOAWebsite(url,fcg_subject_l, gp_subject_l):
from ICL_Awk_Checker import rps_amount_l2
icl_dollar_amount_l = []
amount_ack_missing_l = []
file_total_l = []
body_l = []
for link in url:
print(link)
browser = webdriver.Chrome(options=options,
executable_path=r'\\TEST\user$\TEST\Documents\driver\chromedriver.exe')
# if 'P2 Cust ID 908554 File' in fcg_subject:
browser.get(link)
username = browser.find_element_by_name("dialog:username").get_attribute('value')
submit = browser.find_element_by_xpath("//*[#id='dialog:continueButton']").click()
body = browser.find_element_by_xpath("//*[contains(text(), 'Total:')]").text
body_l.append(body)
icl_dollar_amount = re.findall('(?:[\£\$\€]{1}[,\d]+.?\d*)', body)[0].split('$', 1)[1]
icl_dollar_amount_l.append(icl_dollar_amount)
if not missing_amount:
logging.info("List is empty")
print("List is empty")
count = 0
for amount in missing_amount:
if amount in icl_dollar_amount_l:
body = body_l[count]
get_file_total = re.findall('(?:[\£\$\€]{1}[,\d]+.?\d*)', body)[0].split('$', 1)[1]
file_total_l.append(get_file_total)
return icl_dollar_amount_l, file_date_l, company_id_l, client_id_l, customer_name_l, file_name_l, file_total_l, \
item_count_l, file_status_l, amount_ack_missing_l
I don't know if I understand problem but this
if amount in icl_dollar_amount_l:
doesn't give information on which position is '130,710.67' in icl_dollar_amount_l and you need also
count = icl_dollar_amount_l.index(amount)
for amount in missing_amount:
if amount in icl_dollar_amount_l:
count = icl_dollar_amount_l.index(amount)
body = body_l[count]
But it will works if you expect only one amount on list icl_dollar_amount_l. For more elements you would have to use rather for-loop and check every element separatelly
for amount in missing_amount:
for count, item in enumerate(icl_dollar_amount_l)
if amount == item :
body = body_l[count]
But frankly I don't know why you don't check it in first loop for link in url: when you have direct access to icl_dollar_amount and body
I am trying to connect with elements that carry the contact numbers on each site. I was able to create the routine to get the numbers, extract the contact number with available formats and regex and the following code snippet to get the element
contact_elem = browser.find_elements_by_xpath("//*[contains(text(), '" + phone_num + "')]")
Considering the example of https://www.cssfirm.com/, the contact number appears in 2 locations, the top header and the bottom footer
The element texts accompanying the contact number are as follows :
<h3>CALL US TODAY AT (855) 910-7824</h3> - Footer
<span>Call Us<br>Today</span> (855) 910-7824 - Header
The extracted phone number matches perfectly while printing it out. For some reason, the element from the header part is not being detected.
I tried by searching for elements and even by deleting the footer element from the browser before executing the rest of the code.
What could be the reason for it to go undetected?
P.S: Below is the amateurish,uncorrected code. Efficiency edits/suggestions are welcome. The same code has been tested with various sites and works fine.
url = 'http://www.cssfirm.com/'
browser.get(url)
parsed = browser.find_element_by_tag_name('html').get_attribute('innerHTML')
s = BeautifulSoup(parsed, 'html.parser')
s = s.decode('utf-8')
phoneNumberRegex = '(\s*(?:\+?(\d{1,4}))?[-. (]*(\d{1,})[-. )]*(\d{3}|[A-Z0-9]+)[-. \/]*(\d{4}|[A-Z0-9]+)[-. \/]?(\d{4}|[A-Z0-9]+)?(?: *x(\d+))?\s*)'
custom_re = ['([0-9]{4,4} )([0-9]{3,3} )([0-9]{4,4})',
'([0-9]{3,3} )([0-9]{4,4} )([0-9]{4,4})',
'(\+[0-9]{2,2}-)([0-9]{4,4}-)([0-9]{4,4}-)(0)',
'(\([0-9]{3,3}\) )([0-9]{3,3}-)([0-9]{4,4})',
'(\+[0-9]{2,2} )(\(0\)[0-9]{4,4} )([0-9]{4,6})',
'([0-9]{5,5} )([0-9]{6,6})',
'(\+[0-9]{2,2}\(0\))([0-9]{4,4} )([0-9]{4,4})',
'(\+[0-9]{2,2} )([0-9]{3,3} )([0-9]{4,4} )([0-9]{3,3})',
'([0-9]{3,3}-)([0-9]{3,3}-)([0-9]{4,4})']
phones = []
phones = re.findall(phoneNumberRegex, s)
phone_num_list = ()
phone_num = ''
matched = 0
for phoneHeader in phones:
#phoneHeader = phoneHeader.decode('utf-8')
for ph_cnd in phoneHeader:
for pttrn in custom_re:
phones = re.findall(pttrn,ph_cnd)
if(phones):
phone_num_list = phones
for x in phone_num_list:
phone_num = ''.join(x)
try:
contact_elem = browser.find_element_by_xpath("//*[contains(text(), '" + phone_num + "')]")
phone_num_txt = contact_elem.text
if(phone_num_txt):
matched = 1
break
except NoSuchElementException:
pass
if(matched == 1):
break
if(matched == 1):
break
if(matched == 1):
break
print("Phone number :",phone_num) <-- Perfect output
contact_elem <--empty for header or just the footer element
EDIT
Code updated. Forgot an important piece. Moreover, there is sleep time given in between to give time for the page to load. Considering it trivial, I haven't included them for a quick read.
I found a temporary solution by searching for the partial link text, as the number also comes on the link.
contact_elem2 = browser.find_element_by_partial_link_text(phone_num)
However, this does not answer the generic question as to why that text was ignored within the element.