Unable to click on suggestion provided by textbox (Selenium) - python

When I send text manually, suggestion area provided by first textbox , it works fine. But when I send the text using selenium library it cannot able to select the option from suggestion area, although that option is present inside the textbox. Is there any one who can help me out of that.
import time
from selenium.webdriver.support.ui import Select
path=r"C:\Users\AbdulRehman\Downloads\chromedriver_win32\chromedriver.exe"
# driver = webdriver.Chrome(path)
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
driver.get("https://www4.sii.cl/mapasui/internet/#/contenido/index.html")
try:
element = WebDriverWait(driver, 1000).until(
EC.presence_of_element_located((By.XPATH, '//*[#id="ng-app"]/body/div[5]/div/div/div[3]/div/button'))
)
element.click()
print("prints its working fine now ..")
element = WebDriverWait(driver, 1000).until(
EC.presence_of_element_located((By.XPATH,'//*[#id="titulo"]/div[8]/i'))
)
element.click()
# element = WebDriverWait(driver, 1000).until(
# EC.presence_of_element_located((By.XPATH,'//*[#id="rolsearch"]/div[2]/div[1]/input'))
# )
# element.send_keys("PEDRO AGUIRRE CERD"+Keys.ENTER)
# search = WebDriverWait(driver, 60).until(
# EC.visibility_of_element_located((By.XPATH, '//*[#id="rolsearch"]/div[2]/div[1]/input'))
# )
# search.send_keys("EL MONTE" + Keys.ENTER)
# time.sleep(3)
search = WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, '//*[#id="rolsearch"]/div[2]/div[1]/input'))
)
ActionChains(driver).click(on_element=search).send_keys("EL MONTE").send_keys(Keys.ENTER).perform()
suggestion = WebDriverWait(driver, 60).until(
EC.visibility_of_element_located((By.XPATH, '//strong[text()="EL MONTE"]'))
)
suggestion.click()
# auto_complete = driver.find_elements_by_xpath('//*[#id="rolsearch"]/div[2]/div[1]/input')
# auto_complete[0].click()
# auto_complete.send_keys(Keys.RETURN)
# element.send_keys("somehting in text")
# search = driver.find_element_by_xpath().click()
# search.send_keys(Keys.RETURN)
search_1 = driver.find_element_by_xpath('//*[#id="rolsearch"]/div[2]/div[2]/input')
search_1.send_keys("PEDRO AGUIRRE CERDA")
search_1.send_keys(Keys.RETURN)
search_2 = driver.find_element_by_xpath('//*[#id="rolsearch"]/div[2]/div[3]/input')
search_2.send_keys("somehting in text")
search_2.send_keys(Keys.RETURN)
print("Its also working now ......")
time.sleep(3)
except Exception as e:
print(e)
driver.quit()

The desired element is a Angular element, so to send a character sequence to the element you need to induce WebDriverWait for the element_to_be_clickable() and you can use the following Locator Strategy:
Using XPATH:
driver.get('https://www4.sii.cl/mapasui/internet/#/contenido/index.html')
WebDriverWait(driver, 60).until(EC.element_to_be_clickable((By.XPATH, "//button[text()='Aceptar']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//i[#data-ng-click='mostrarBusquedaRol()']"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[contains(#data-ng-include, '/mapasui/common/_content/busqueda-rol.html')]//div[#id='rolsearch']//label[contains(., 'Comuna')]//following::input[1]"))).send_keys("PEDRO AGUIRRE CERD" + Keys.ENTER)
Note: You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
Browser Snapshot:

Related

Python Selenium Football Odds Webscraping

I am trying to scrape odds from https://en.stoiximan.gr/live. While my code is working, I get an error for having uneven lists in my final dataframe. Unfortunately, stoiximan seems to place 3-way odds together with over/under odds and suspended/locked matches (as in the picture).
What I am trying to do is to delete both home and away teams from their respective lists if their odds are over/under or locked. Any suggestions?
Here 's my code so far:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time
import openpyxl
import os
#launch chrome and keep window open
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
driver = webdriver.Chrome(ChromeDriverManager().install(), options = chrome_options)
#visit en.stoiximan.gr and maximize window
driver.get("https://en.stoiximan.gr/live/")
driver.maximize_window()
#close modal window
try:
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.XPATH, "//button[#class='sb-modal__close__btn uk-modal-close-default uk-icon uk-
close']"
))).click()
except:
pass
#accept cookies
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((
By.ID, "onetrust-accept-btn-handler"
))).click()
#Initialize storage for stoiximan
stoiximan_home_teams_list = []
stoiximan_away_teams_list = []
stoiximan_home_odds_list = []
stoiximan_draw_odds_list = []
stoiximan_away_odds_list = []
#grab all home/away teams and explicit odds
try:
stoiximan_home_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-
row__container--row']/div[1]/a/div[1]/div[1]/span"))
)
stoiximan_away_teams = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[1]/a/div[1]/div[2]/span"))
)
stoiximan_home_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[1]/span[2]"))
)
stoiximan_draw_odds = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[2]/span[2]"))
)
stoiximan_away_odds = WebDriverWait(driver, 1).until(
EC.presence_of_all_elements_located((
By.XPATH,
"//div[#class='live-events-event-row__container live-event live-events-event-row__container--row']/div[2]/div/button[3]/span[2]"))
)
except:
driver.quit()
#loop each home team and append the lists
for stoiximan_home_team in stoiximan_home_teams:
stoiximan_home_teams_list.append(stoiximan_home_team.get_attribute('innerText'))
for stoiximan_away_team in stoiximan_away_teams:
stoiximan_away_teams_list.append(stoiximan_away_team.get_attribute('innerText'))
for stoiximan_home_odd in stoiximan_home_odds:
stoiximan_home_odds_list.append(stoiximan_home_odd.text)
for stoiximan_draw_odd in stoiximan_draw_odds:
stoiximan_draw_odds_list.append(stoiximan_draw_odd.text)
for stoiximan_away_odd in stoiximan_away_odds:
stoiximan_away_odds_list.append(stoiximan_away_odd.text)
print(stoiximan_home_teams_list)
print(len(stoiximan_home_teams_list))
print(stoiximan_away_teams_list)
print(len(stoiximan_away_teams_list))
print(stoiximan_home_odds_list)
print(len(stoiximan_home_odds_list))
print(stoiximan_draw_odds_list)
print(len(stoiximan_draw_odds_list))
print(stoiximan_away_odds_list)
print(len(stoiximan_away_odds_list))
#make str to float in odds lists
stoiximan_home_odds_list_float = [float(i) for i in stoiximan_home_odds_list]
stoiximan_draw_odds_list_float = [float(j) for j in stoiximan_draw_odds_list]
stoiximan_away_odds_list_float = [float(k) for k in stoiximan_away_odds_list]
#create dictionary for data
stoiximan_dict = {'Stoiximan Home Team': stoiximan_home_teams_list,
'Stoiximan Away Team': stoiximan_away_teams_list,
'Stoiximan Home Odd': stoiximan_home_odds_list_float,
'Stoiximan Draw Odd': stoiximan_draw_odds_list_float,
'Stoiximan Away Odd': stoiximan_away_odds_list_float
}
#create dataframe for data
df4 = pd.DataFrame(stoiximan_dict)
print(df4)
#write to excel file and open it
df4.to_excel(r'C:\Users\sweet_000\Desktop\data.xlsx', sheet_name="stoiximan", index=False)
os.system('start EXCEL.EXE "C:\\Users\\sweet_000\\Desktop\\data.xlsx"')
driver.quit()

selenium doesn't want the website to get to the 3rd page

Whenever I want selenium to press enter for me, it doesn't want to, get to the next page.
Is something wrong with the code?
from selenium import webdriver
from selenium.webdriver.common import keys
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
PATH = "C:\Pro\chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://insurify.com")
try:
search = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "zipcodeInput"))
)
search.send_keys('34997')
search.send_keys(Keys.RETURN)
element1 = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span > input:nth-child(2)"))
)
element1.send_keys("2016")
element1.send_keys(Keys.RETURN)
time.sleep(30)
element2 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element2.send_keys('BMW')
element2.send_keys(Keys.RETURN)
element3 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element3.send_keys('4-Series')
element3.send_keys(Keys.RETURN)
element4 = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span.twitter-typeahead > input:nth-child(2)"))
)
element4.send_keys('428i')
element4.send_keys(Keys.RETURN)
time.sleep(50)
except:
driver.quit
Also there's a picture for the last code execution of the code.
By running driver.implicitly_wait(30) right after the definition of driver, we can get rid of all the commands WebDriverWait(driver, 30).until(EC.presence_of_element_located((...))). Moreover, with a proper use of find_element() and click() we can replace the blocks of code such as
element1 = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#typeahead-input > div > span > input:nth-child(2)")))
element1.send_keys("2016")
element1.send_keys(Keys.RETURN)
with a one line command. The final code is
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
driver = webdriver.Chrome(service=Service(your_chromedriver_path))
driver.implicitly_wait(30)
driver.get("https://insurify.com")
driver.find_element(By.CSS_SELECTOR, '#zipcodeInput').send_keys('34997')
driver.find_element(By.XPATH, '//button[text()="View my quotes"]').click()
driver.find_element(By.XPATH, '//div[text() = "2016"]').click()
driver.find_element(By.XPATH, '//span[text()="BMW"]').click()
driver.find_element(By.XPATH, '//div[text()="4-Series"]').click()
driver.find_element(By.XPATH, '//div[text() = "428i"]').click()

Scraping Notion

After the bot clicks on the first book,it should wait until all quotes are visible; however, it doesn't. It only gets 4 quotes out of 25. It can get the 25 quotes if I time.sleep(10) but this is not efficient. How can I solve this?
class Scraping:
def __init__(self):
pass
def openWebDriver(self):
chromeOptions = Options()
self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chromeOptions)
def fetchNotion(self):
self.driver.get('https://sumptuous-salesman-ca6.notion.site/bf68366a212e45e1ae9bee853867c225?v=85ee9cedeb5a44c994e49033053f593b')
def getBooks(self):
books = WebDriverWait(self.driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[#class="notion-selectable notion-page-block notion-collection-item"]')))
books[0].click()
def getQuotes(self):
quotesElements = WebDriverWait(self.driver, 10).until(
EC.presence_of_all_elements_located((By.XPATH, '//div[#placeholder="Empty quote"]')))
quotes = []
for quoteElement in quotesElements:
quotes.append(quoteElement.text)
print(len(quotesElements))
return quotes
scraping = Scraping()
scraping.openWebDriver()
scraping.fetchNotion()
scraping.getBooks()
scraping.getQuotes()
You were close enough. Instead of presence_of_all_elements_located() you have to induce WebDriverWait for the visibility_of_all_elements_located() and you can use either of the following ocator strategies:
Using CSS_SELECTOR:
def getQuotes(self):
print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div[placeholder='Empty quote']")))])
Using CSS_SELECTOR in a single line:
def getQuotes(self):
print([my_elem.get_attribute("innerText") for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[#placeholder='Empty quote']")))])
Note : You have to add the following imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

Attempt to print all records in a table only get's the first 16 rows

When I run the below Python program the statement
print([
my_elem.text for my_elem in
WebDriverWait(driver, 20)
.until(EC.visibility_of_all_elements_located(
(By.XPATH,
"/html/body/div[1]/ui-view/div/div[1]"
"/div/div/div/div/exploration-container/exploration-container-modern"
"/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern"
"/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform"
"/div/div[3]/detail-visual-modern/div/visual-modern/div/div/div[2]/div[1]"
)
))
])
should print all records in the table but it only prints the first 16 rows plus 4 rows from the second column. How can I get all rows printed?
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Firefox()
driver.get("https://app.powerbi.com/view?r=eyJrIjoiZGYxNjYzNmUtOTlmZS00ODAxLWE1YTEtMjA0NjZhMzlmN2JmIiwidCI6IjljOWEzMGRlLWQ4ZDctNGFhNC05NjAwLTRiZTc2MjVmZjZjNSIsImMiOjh9")
for i in range(1, 4):
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//span[#class='navigation-wrapper navigation-wrapper-big']//i[#title='Next Page']"))).click()
action = ActionChains(driver)
action.move_to_element(driver.find_element_by_xpath("/html/body/div[1]/ui-view/div/div[1]/div/div/div/div/exploration-container/exploration-container-modern/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform")).perform()
action.context_click().perform()
element = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//div[#title='Show as a table']")))
action.move_to_element(element).click().perform()
print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[1]/ui-view/div/div[1]/div/div/div/div/exploration-container/exploration-container-modern/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform/div/div[3]/detail-visual-modern/div/visual-modern/div/div/div[2]/div[1]")))])
driver.quit()
Additional info
I managed to get this working although it feels a little hacky. Here's my latest code. You'll see there's a second ActionChain:
driver = webdriver.Firefox()
driver.get("https://app.powerbi.com/view?r=eyJrIjoiZGYxNjYzNmUtOTlmZS00ODAxLWE1YTEtMjA0NjZhMzlmN2JmIiwidCI6IjljOWEzMGRlLWQ4ZDctNGFhNC05NjAwLTRiZTc2MjVmZjZjNSIsImMiOjh9")
for i in range(1, 4):
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//i[#title='Next Page']"))).click()
action = ActionChains(driver)
action.move_to_element(driver.find_element_by_xpath("/html/body/div[1]/ui-view/div/div[1]/div/div/div/div/exploration-container/exploration-container-modern/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform")).perform()
action.context_click().perform()
element = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//div[#title='Show as a table']")))
action.move_to_element(element).click().perform()
action2 = ActionChains(driver)
scroll_element = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "/html/body/div[1]/ui-view/div/div[1]/div/div/div/div/exploration-container/exploration-container-modern/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform/div/div[3]/detail-visual-modern/div/visual-modern/div")))
action2.move_to_element(scroll_element).click()
action2.send_keys(Keys.UP)
action2.send_keys(Keys.UP)
action2.send_keys(Keys.UP).perform()
print([my_elem.text for my_elem in WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "/html/body/div[1]/ui-view/div/div[1]/div/div/div/div/exploration-container/exploration-container-modern/div/div/exploration-host/div/div/exploration/div/explore-canvas-modern/div/div[2]/div/div[2]/div[2]/visual-container-repeat/visual-container-modern[9]/transform/div/div[3]/detail-visual-modern/div/visual-modern/div/div/div[2]/div[1]")))])
Can this be done with just one ActionChain and if so how?

How can I get data from a online pdf file using python?

I need to get the data value of the amount of money in a online pdf file. I need the number R$ 20.707,00 which is the total of "serviço"
I am using selenium to reach this PDF:
import os
import time
import xlrd # importando a biblioteca
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import zipfile
inicio = time.time()
datainicial = "01042019"
datafinal = '30042019'
cnpj = '13177807000146'
senha = 'qualita#2018'
driver = webdriver.Chrome()
# driver.maximize_window()
driver.get("https://directa.natal.rn.gov.br/")
# Logando
driver.switch_to.frame(driver.find_element_by_name("mainsystem"))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "usuario"))).send_keys(cnpj)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "senha"))).send_keys(str(senha))
time.sleep(2)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "button.btn#acessar"))).click()
# Nota natalense
WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.NAME, "mainsystem")))
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'limenu9'))).click()
time.sleep(1)
# Consulta
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#limenu9 > div > a:nth-child(1)"))).click()
# Consulta NFe
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#formsmenu12 > li:nth-child(4) > a'))).click()
# Trocando frame
time.sleep(1)
driver.switch_to.frame(0)
driver.switch_to.frame(0)
# Selecionando empresa
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="lay"\]/div\[2\]/div\[2\]/div\[7\]/div\[2\]/div/div/table/tbody/tr/td'))).click()
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="lookupInput"\]/option\[2\]'))).click()
#Marcando serviços prestados
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="HTMLGroupBox628126"\]/table/tbody/tr\[2\]/td/table/tbody/tr/td\[1\]/a/img'))).click()
#Marcando retenção de ISS
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="HTMLGroupBox628123"\]/table/tbody/tr\[1\]/td/table/tbody/tr/td\[1\]/a/img'))).click()
# selecionando a data inicial
driver.find_element_by_name('WFRInput628128').send_keys(datainicial)
# selecionando a data final
driver.find_element_by_name('WFRInput628127').send_keys(datafinal)
#Consultar
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="lay"\]/div\[2\]/div\[2\]/div\[21\]/div/table/tbody/tr/td'))).click()
#Imprimir relatório
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*\[#id="lay"\]/div\[2\]/div\[3\]/div\[5\]/div/table/tbody/tr/td'))).click()
fim = time.time()
duracao = fim - inicio
print('O programa rodou em: {} e foram baixadas {} empresas'.format(duracao, i))
What can I use to get only this data from this PDF file?
I would go easier route:
Download pdf file using any library like requests, then read pdf file text using tika, then search for my word using regex or any other method
from tika import parser
raw = parser.from_file('sample.pdf')
print(raw['content'])

Categories