Problem with if-else conditions in Python selenium - python

Hey guys I have a problem with the if-else conditions.I was creating the following bot that searches and alerts me when theres a appointment available but I can't make the if-else conditons work at the final lines of the code,the bot doesn't respect the if-else conditions I've tried changing several times the code but no idea of how to resolve this problem.Thanks for the help.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from time import sleep
import time
import re
import os
import winsound
duration = 2000 # milliseconds
freq = 900 # Hz
lets_go = True
while lets_go == True:
browser = webdriver.Chrome()
browser.implicitly_wait(30)
browser.maximize_window()
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
browser.get("https://icp.administracionelectronica.gob.es/icpplus/index.html")
browser.verificationErrors = []
cookie_kill = browser.find_element_by_id("cookie_action_close_header")
cookie_kill.click()
#sleep(1)
madrid = browser.find_element_by_xpath('//*[#id="form"]/option[34]')
madrid.click()
#sleep(1)
accept = browser.find_element_by_id("btnAceptar")
accept.click()
#sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
tramites_group = browser.find_element_by_xpath('/html/body/div[1]/div[2]/main/div/div/section/div[2]/form[1]/div[3]/div[1]/div[2]/div/fieldset/div[2]/select/option[3]')
tramites_group.click()
sleep(1)
aceptar = browser.find_element_by_id("btnAceptar")
aceptar.click()
sleep(1)
browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
enter_button = browser.find_element_by_id('btnEntrar')
enter_button.click()
sleep(1)
passport = browser.find_element_by_id("rdbTipoDocPas")
passport.click()
passport_number = browser.find_element_by_id("txtIdCitado").send_keys("123456789")
person_name = browser.find_element_by_id("txtDesCitado").send_keys("BORIS JOHNSON")
person_birth = browser.find_element_by_id("txtAnnoCitado").send_keys("1900")
nationality = browser.find_element_by_xpath('/html/body/div[1]/div[2]/main/div/div/section/div[2]/form/div/div/div[1]/div[5]/div/div/div/div/span/select/option[200]')
nationality.click()
browser.execute_script("window.scrollTo(0,document.body.scrollHeight)")
sleep(1)
enviar = browser.find_element_by_id("btnEnviar")
browser.execute_script("arguments[0].click();", enviar)
sleep(1)
enviar = browser.find_element_by_id("btnEnviar")
browser.execute_script("arguments[0].click();", enviar)
sleep(1)
no_appointments = browser.page_source.find("En este momento no hay citas disponibles.")
if no_appointments:
browser.close()
time.sleep(120)
else:
winsound.Beep(freq, duration)
print("found")
lets_go = False
break

page_source returns a normal Python string. The find method of a string does not return a boolean True/False. It returns the starting character number if found, and -1 if not found. Thus, you want:
no_appointments = browser.page_source.find("En este momento no hay citas disponibles.")
if no_appointments >= 0:
browser.close()
time.sleep(120)
You might consider whether it makes more sense to write:
if "En este momento no hay citas disponibles" in browser.page_source:

Related

Python WebScraping - Sleep oscillate in slow websites

I have a webscraping, but the site I'm using in some days is slow and sometimes not. Using the fixed SLEEP, it gives an error in a few days. How to fix this?
I use SLEEP in the intervals of the tasks that I have placed, because the site is sometimes slow and does not return the result giving me an error.
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox import options
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import Select
import pandas as pd
import json
from time import sleep
options = Options()
options.headless = True
navegador = webdriver.Firefox(options = options)
link = '****************************'
navegador.get(url = link)
sleep(1)
usuario = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_txtLogin')
usuario.send_keys('****************************')
sleep(1)
senha = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_txtSenha')
senha.send_keys('****************************')
sleep(2.5)
botaologin = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_btnEnviar')
botaologin.click()
sleep(40)
agendamento = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_TreeView2t8')
agendamento.click()
sleep(2)
selecdia = navegador.find_element(By.CSS_SELECTOR, "a[title='06 de dezembro']")
selecdia.click()
sleep(2)
selecterminal = navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_ddlVagasTerminalEmpresa')
selecterminal.click()
sleep(1)
select = Select(navegador.find_element(by=By.ID, value='ctl00_ctl00_Content_Content_ddlVagasTerminalEmpresa'))
select.select_by_index(1)
sleep(10)
buscalink = navegador.find_elements(by=By.XPATH, value='//*[#id="divScroll"]')
for element in buscalink:
teste3 = element.get_attribute('innerHTML')
soup = BeautifulSoup(teste3, "html.parser")
Vagas = soup.find_all(title="Vaga disponível.")
print(Vagas)
temp=[]
for i in Vagas:
on_click = i.get('onclick')
temp.append(on_click)
df = pd.DataFrame(temp)
df.to_csv('test.csv', mode='a', header=False, index=False)
It returns an error because the page does not load in time and it cannot get the data, but this time is variable
Instead of all these hardcoded sleeps you need to use WebDriverWait expected_conditions explicit waits.
With it you can set some timeout period so Selenium will poll the page periodically until the expected condition is fulfilled.
For example if you need to click a button you will wait for that element clickability. Once this condition is found Selenium will return you that element and you will be able to click it.
This will reduce all the redundant delays on the one hand and will keep waiting until the condition is matched on the other hand (until it is inside the defined timeout).
So, your code can be modified as following:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
#-----
wait = WebDriverWait(navegador, 30)
navegador.get(link)
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_txtLogin"))).send_keys('****************************')
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_txtSenha"))).send_keys('****************************')
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_btnEnviar"))).click()
wait.until(EC.element_to_be_clickable((By.ID, "ctl00_ctl00_Content_Content_TreeView2t8"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[title='06 de dezembro']"))).click()
etc.

I have problem to select the year (python - selenium - vsCode)

Im cannot select the year.
Run the code please an you will see what happens at the end.
I've tried many ways.
Cannot find the solution.
# -*- coding: utf-8 -*-
from time import time
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from datetime import datetime
from datetime import date
driver = webdriver.Chrome()
paginaHit = 'https://hit.com.do/solicitud-de-verificacion/'
driver.get(paginaHit)
driver.maximize_window()
embed = driver.find_element(By.CSS_SELECTOR, "embed")
driver.switch_to.frame(embed)
bl = 'SMLU7270944A'
clasificacion = 'Mudanzas'
wait =WebDriverWait(driver, 20)
wait.until(EC.visibility_of_element_located((By.ID, "billoflanding"))).send_keys(bl)
seleccion = Select(driver.find_element(By.ID, "cboClasificación"))
seleccion.select_by_visible_text(clasificacion)
btnBuscar = driver.find_element(By.XPATH, '/html/body/div/app-root/div/form/div/div[3]/div/button').click()
time.sleep(4)
consignatario = driver.find_element(By.ID, 'cosignatario').send_keys("LOGISTICA ADUANAL")
# seleccionMercancia = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[9]/div/select'))
# seleccionMercancia.select_by_visible_text("Articulos del hogar")
condicion = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[10]/div/select'))
condicion.select_by_visible_text("Verificación")
solicitante = driver.find_element(By.ID, "nombreVisitante").send_keys("JONATHAN MENDEZ GARCIA")
correo = driver.find_element(By.ID, "correo").send_keys("laduanal#gmail.com")
telefono = driver.find_element(By.ID, "telefono").send_keys("8098013610")
tipoDocumento = Select(driver.find_element(By.XPATH,'/html/body/div/app-root/div/div[2]/datos-generales/form/div/div[16]/div/select'))
tipoDocumento.select_by_visible_text("Cédula")
cedula = driver.find_element(By.ID, "cedulaVisitante2").send_keys("00111452470")
nombreYapellido = driver.find_element(By.ID, "text01").send_keys("JONATHAN MENDEZ GARCIA")
tipoDocumento2 = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[3]/div/select'))
tipoDocumento2.select_by_visible_text("Cédula")
rolVisitante = Select(driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[4]/div/select'))
rolVisitante.select_by_visible_text("Representante")
cedulaVisitante = driver.find_element(By.ID, "cedulaVisitante").send_keys("00111452470")
btnAgregarPersonal = driver.find_element(By.XPATH, '/html/body/div/app-root/div/visitante-form/form/div/div[7]/div/div[1]/button').click()
#SELECCIONAR FECHA DE VERFICACION
fechaDeseada = "0929 2022"
fechaVerificacion = driver.find_element(By.ID, "fechaVerificar")
fechaVerificacion.send_keys(fechaDeseada)
The problem is in the last 3 lines of the code, but you need to run it in order to see what happen. Will apreciate any help. Im trying to fill up a form because is a task that need to be done all day long.
For whatever reason, the date field on hit.com.do is buggy, so I had to enter the year first, then go back to entering the day & month. I'd replace your last three lines with:
fechaVerificacion = driver.find_element(By.ID, "fechaVerificar")
fechaVerificacion.send_keys(Keys.TAB, Keys.TAB, "2022", Keys.LEFT, Keys.LEFT, "2909")

Different length to place in a dataframe from scraping

for scraping reactions for a post in a page facebook, i can scrape all the informations (comments, reactions, tags,...) but when i want to put them in a dataframe, a have an error (arrays must all be same length) which is normal because sometime there's someone who put only a comment and an another one only a tag, so i have lists with differents length. i think i can put a conditionnal if but may there another optimized solution...
for example len(tag) =2, len(usr) = 17, len(commentaire)=12.
thanks :)
#imports here
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import requests
from bs4 import BeautifulSoup
import time
from time import sleep
from lxml import html
import logging as log
import pandas as pd
#chemin de chrome et desactivation des adds automatique de FB anti scrape
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
driver = webdriver.Chrome('C:/Users/User/Downloads/chromedriver.exe',
chrome_options=chrome_options)
#open FB
driver.get("http://www.facebook.com")
print ("facebook page log ok")
sleep(1)
#reperage de user et pass (css_selector)
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"input[name='email']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"input[name='pass']")))
##reperage de user et pass et click (xpath)
#username = driver.find_element(By.XPATH,"//input[contains(#id,'email')]")
#password = driver.find_element(By.XPATH,"//input[contains(#id,'pass')]")
usr=input('Enter Email Id:')
pwd=input('Enter Password:')
#enter données
username.clear()
username.send_keys(usr)
print ("Email Id entered")
sleep(1)
password.clear()
password.send_keys(pwd)
print ("Pass entered")
#reperage bouton log in et click
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"button[type='submit']"))).click()
print("login Successfully...")
time.sleep(5)
post = 'https://mbasic.facebook.com/AJSTunisie/posts/6452145678144265'
#open the webpage
driver.get(post)
page = requests.get(post)
df_comm = pd.DataFrame(columns = ['post_url', 'user', 'commentaire', 'tag', 'user_url'])
page_count = 0
while (True ):
#scrape les reactions
tree = html.fromstring(driver.page_source)
user = tree.xpath("//div[#class='eb']/div/h3/a/text()")
commentaire = tree.xpath("//div[#class='eb']/div/div[1]/text()")
tag = tree.xpath("//div[#class='eb']/div/div[1]/a/text()")
user_url = tree.xpath("//div[#class='eb']/div/h3/a/#href")
data= {'post_url':[post]*len(user), 'user':user, 'commentaire':commentaire, 'tag':tag,
'user_url':user_url}
df_comm = df_comm.append(pd.DataFrame(columns = df_comm.columns,data=data))
#Check if more reaction exist ("En afficher davantage" existe ou pas)
next_link = tree.xpath("//div[#class='eb eu']/a/#href")
if len(next_link)!= 0:
driver.find_element_by_xpath("//div[#class='eb eu']/a/#href").click()
page_count = page_count+1
else :
next_link = ''
break
df_comm =df_comm.reset_index()
#df_comm.to_csv(path,index=False)
driver.close()
You should do it in different way.
First you should find all comments - elements with text, user, tag, etc. - and next you should use for-loop to work with every comment separatelly. If loop you should use relavite xpath (starting at .) to get only information for this single comment. And then you can see if you have missing tag or other item and you can put some default value - i.e. empty string.
This way every comment will have all values so every row in CSV will have the same size.
This way also resolve other problem - in previous method you could get first comment with tag from second comment and you couldn't control it.
To make code simpler I put every comment on list of rows and later I convert all to DataFrame.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import time
from lxml import html
import logging as log
import pandas as pd
#chemin de chrome et desactivation des adds automatique de FB anti scrape
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.default_content_setting_values.notifications" : 2}
chrome_options.add_experimental_option("prefs",prefs)
#driver = webdriver.Chrome('C:/Users/User/Downloads/chromedriver.exe', chrome_options=chrome_options)
driver = webdriver.Chrome(chrome_options=chrome_options)
#open FB
driver.get("http://www.facebook.com")
print ("facebook page log ok")
time.sleep(1)
#reperage de user et pass (css_selector)
username = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"input[name='email']")))
password = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"input[name='pass']")))
##reperage de user et pass et click (xpath)
#username = driver.find_element(By.XPATH,"//input[contains(#id,'email')]")
#password = driver.find_element(By.XPATH,"//input[contains(#id,'pass')]")
usr = input('Enter Email Id:')
pwd = input('Enter Password:')
#enter données
username.clear()
username.send_keys(usr)
print ("Email Id entered")
#time.sleep(1)
password.clear()
password.send_keys(pwd)
print ("Pass entered")
#reperage bouton log in et click
button = WebDriverWait(driver, 2).until(EC.element_to_be_clickable((By.CSS_SELECTOR,
"button[type='submit']"))).click()
print("login Successfully...")
time.sleep(5)
post_url = 'https://mbasic.facebook.com/AJSTunisie/posts/6452145678144265'
#open the webpage
driver.get(post_url)
all_rows = []
page_count = 0
while True:
#scrape les reactions
page_count += 1
print('\n--- page:', page_count, '---\n')
tree = html.fromstring(driver.page_source)
# find all comments
all_comments = tree.xpath("//div[#class='ec']/div")
print('len(all_comments):', len(all_comments))
# work with every comment separatelly
for comment in all_comments:
user = comment.xpath(".//h3/a/text()") # relative xpath starting at `.`
print('user:', user)
user = user[0] if user else "" # set default value
print('user:', user)
commentaire = comment.xpath(".//div[1]/text()") # relative xpath starting at `.`
print('commentaire:', commentaire)
commentaire = commentaire[0] if commentaire else "" # set default value
print('commentaire:', commentaire)
tag = comment.xpath(".//div[1]/a/text()") # relative xpath starting at `.`
print('tag:', tag)
tag = tag[0] if tag else "" # set default value
print('tag:', tag)
user_url = comment.xpath(".//h3/a/#href") # relative xpath starting at `.`
print('user_url:', user_url)
user_url = user_url[0] if user_url else "" # set default value
print('user_url:', user_url)
all_rows.append([post_url, user, commentaire, tag, user_url])
#Check if more reaction exist ("En afficher davantage" existe ou pas)
next_link = driver.find_elements_by_xpath("//div[#class='ec es']/a")
print('---')
print('len(next_link):', len(next_link))
if next_link:
next_link[0].click()
time.sleep(2)
else:
break
# - after loop -
df = pd.DataFrame(all_rows, columns=['post_url', 'user', 'commentaire', 'tag', 'user_url'])
print(df)
df.to_csv('output.csv', index=False)
#driver.close()

Automation of stock updates

I'm building a bot to print prizes of bonds from hour to hour. However, I am incurring in two errors:
Since the task requires the browser to open from time to time, it ruins the experience while using the notebook. Is there a way to keep this task as a 'background' rule?
I am using schedule library to set the update, but I am not quite sure it is right (even though I read the manual). Or the time set is not respected (I set to 10 minutes and the code is read from 5 to 5) or the function time it is not updated (it repeats minutes/hours/seconds).
The code is below:
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
import time
import datetime
import schedule
clock = datetime.datetime.now()
def preço():
os.chdir('C:/Users/Thiago/Desktop/Backup/Python')
options = webdriver.ChromeOptions()
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
driver.get("https://www.google.com/")
elem = driver.find_element_by_name("q")
elem.clear()
elem.send_keys("cvcb3")
time.sleep(1)
elem = driver.find_element_by_name("btnK")
elem.click()
time.sleep(2)
cvcb3 = driver.find_element_by_xpath(".//span[#jsname = 'vWLAgc']")
preço_cvcb3 = open('preço_cvcb3.txt', 'a')
preço_cvcb3.write('O preço da ação da CVC é ' + cvcb3.get_attribute("innerHTML") + ' - Extração feita ás ' + clock.strftime("%I:%M:%S %p") + '.\n')
preço_cvcb3.close()
print('O preço da ação da CVC é ' + cvcb3.get_attribute("innerHTML") + ' - Extração feita ás ' + clock.strftime("%I:%M:%S %p") + '.\n')
driver.close()
schedule.every(1).minutes.do(preço)
while True:
schedule.run_pending()
time.sleep(1)

Conditional dropdown for loop is not working in the expected way

I had posted in Stack Exchange earlier; however, did not get much response from that yet; hence, posting it here.
I am trying to scrape some data using the following code. When I run the code line by line, it works fine. However, when I want to run all code at one go, the dropdown options go blank and as a result, the last line returns error. Your help would be much appreciated. The code is below.
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
import os
path = os.path.join(r"D:\ScrapedData\TN\SocialAudit")
path_to_chromedriver = 'D:\ScrapedData/chromedriver'
options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : path}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(chrome_options=options ,executable_path=path_to_chromedriver)
url = "http://mnregaweb4.nic.in/netnrega/SocialAudit/StateList.aspx"
browser.get(url)
browser.set_page_load_timeout(45)
browser.maximize_window()
browser.find_element_by_link_text("BIHAR").click()
browser.implicitly_wait(5)
year=['2016-2017', '2017-2018', '2018-2019', '2019-2020']
elem2 = browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddlFin")
elem2.send_keys(year[0])
browser.implicitly_wait(5)
select_dist = browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddldist")
options = [x for x in select_dist.find_elements_by_tag_name("option")]
dist=[]
for e in range(len(options)):
select_dist = Select(browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddldist"))
select_dist.select_by_index(e)
select_block = Select(browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddlblock"))
options1 = select_block.options
for f in range(len(options1)):
select_block = Select(browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddlblock"))
select_block.select_by_index(f)
select_gp = Select(browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddlpanchayat"))
options2 = select_gp.options
for g in range(len(options2)):
select_gp = Select(browser.find_element_by_name("ctl00$ContentPlaceHolder1$ddlpanchayat"))
select_gp.select_by_index(g)
browser.find_element_by_css_selector("#ctl00_ContentPlaceHolder1_rbLoginLevel_1").click()
browser.implicitly_wait(10)
elem6 = browser.find_element_by_name("ctl00$ContentPlaceHolder1$txtperiodFrom")
elem6.send_keys('01/04/2016')
browser.implicitly_wait(10)
elem7 = browser.find_element_by_name("ctl00$ContentPlaceHolder1$txtperiodTo")
elem7.send_keys('31/03/2017')
browser.implicitly_wait(10)
browser.find_element_by_css_selector("#ctl00_ContentPlaceHolder1_login").click()
browser.implicitly_wait(10)
browser.find_element_by_link_text("Download All Reports").click()
Besides that the target page is slower than an aged snail, and those 10 second waits are barely enough for anything, there are two things you missed an those caused your troubles:
you did not take account that the first element of the select options are "select an option" types. So if you try to cycle trough all of them, you must ignore the option at the first index, else it will look like "nothing is selected"
wait for that spinner. After the spinner is gone, page will be refreshed. Do not grab the elements before page refresh is complete, wait until the spinner is gone.
With these two helper functions it is possible to press the "Get Reports" button without issues:
def is_spinner_gone(arg):
loaded_spinner = browser.find_element_by_xpath('//div[//div[#class="loader"]]')
if loaded_spinner:
return loaded_spinner.get_attribute('style') == 'display: none;'
return True
def wait_for_element(xpath):
# this is necessary because the spinner does not pop up instantly
time.sleep(1)
no_spinner = WebDriverWait(browser, 500).until(is_spinner_gone)
element = WebDriverWait(browser, 500).until(
EC.element_to_be_clickable((By.XPATH, xpath)))
return element
If you get your elements via the wait_for_element call then you'll be able to interact with them without error. I guess you know that pressing that button is not the end of the road yet, you'll have to choose the report format and who knows what later on.
Adjusted code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
import os
import time
path = os.path.join(r"D:\ScrapedData\TN\SocialAudit")
path_to_chromedriver = 'D:\ScrapedData/chromedriver'
options = webdriver.ChromeOptions()
prefs = {'download.default_directory' : path}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(chrome_options=options ,executable_path=path_to_chromedriver)
start = time.time()
url = "http://mnregaweb4.nic.in/netnrega/SocialAudit/StateList.aspx"
browser.get(url)
browser.set_page_load_timeout(45)
browser.maximize_window()
loaded = time.time()
print(f'PAGE LOADED IN {loaded-start} seconds')
browser.find_element_by_link_text("BIHAR").click()
def is_spinner_gone(arg):
loaded_spinner = browser.find_element_by_xpath('//div[//div[#class="loader"]]')
if loaded_spinner:
return loaded_spinner.get_attribute('style') == 'display: none;'
return True
def wait_for_element(xpath):
# this is necessary because the spinner does not pop up instantly
time.sleep(1)
no_spinner = WebDriverWait(browser, 500).until(is_spinner_gone)
element = WebDriverWait(browser, 500).until(
EC.element_to_be_clickable((By.XPATH, xpath)))
return element
year=['2016-2017', '2017-2018', '2018-2019', '2019-2020']
elem2 = wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddlFin"]')
selector_page_loaded = time.time()
print(f'WORK AREA LOADED IN {selector_page_loaded-loaded} seconds')
elem2.send_keys(year[0])
select_dist = wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddldist"]')
options = [x for x in select_dist.find_elements_by_tag_name("option")]
dist=[]
# ISSUE: default fields are included in the options!
for e in range(1,len(options)):
select_dist = Select(wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddldist"]'))
select_dist.select_by_index(e)
select_block = Select(wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddlblock"]'))
options1 = select_block.options
for f in range(1, len(options1)):
select_block = Select(wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddlblock"]'))
select_block.select_by_index(f)
select_gp = Select(wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddlpanchayat"]'))
options2 = select_gp.options
for g in range(1, len(options2)):
select_gp = Select(wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$ddlpanchayat"]'))
select_gp.select_by_index(g)
wait_for_element('//*[#id="ctl00_ContentPlaceHolder1_rbLoginLevel_1"]').click()
elem6 = wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$txtperiodFrom"]')
elem6.send_keys('01/04/2016')
elem7 = wait_for_element('//*[#name="ctl00$ContentPlaceHolder1$txtperiodTo"]')
elem7.send_keys('31/03/2017')
wait_for_element('//*[#value="Get Reports"]').click()
print(f'FIRST RUN IN {time.time()-selector_page_loaded}')

Categories