import os, time, keyword, re, getpass, urllib, requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from urllib import request
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
Decision_from = input("Enter the Date From (DD-MM-YYYY):- ")
download_dir = "d:/dirName"
profile = webdriver.FirefoxProfile()
profile.set_preference("plugin.state.flash", 0)
profile.set_preference("plugin.state.java", 0)
profile.set_preference("media.autoplay.enabled", False)
profile.set_preference("browser.download.folderList", 2)
# whether or not to show the Downloads window when a download begins.
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", download_dir)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
"application/octet-stream" +
",application/zip" +
",application/x-rar-compressed" +
",application/x-gzip" +
",application/msword")
driver = webdriver.Firefox(firefox_profile=profile, executable_path=r'd:/geckodriver.exe')
driver.get("https://dsscic.nic.in/cause-list-report-web/view-decision/1")
driver.find_element_by_class_name('form-control').send_keys(Decision_from)
driver.find_element_by_xpath("//*[#id='submit']").click()
driver.find_element_by_xpath("//*[#id='page_length']/option[text()='5000']").click()
rows = len(driver.find_elements_by_xpath("//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr"))
columns = len(driver.find_elements_by_xpath("//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr/td"))
suni="//*[#id='wrapperContent']/div/div/div/section/div/div/div/table/tbody[2]/tr["
ashu="]/td[9"
pansing="]/form/button"
for t_row in range(1, (rows + 1)):
if t_row == 1:
print("Hello Jaaneman First Row Empty")
else:
Final_Path = suni + str(t_row) + ashu + pansing
driver.find_element_by_xpath(Final_Path).click()
handles = driver.window_handles
size = len(handles)
parent_handle = driver.current_window_handle
if handles[1] != parent_handle:#if handles[2] != parent_handle:
driver.switch_to.window(handles[1])#driver.switch_to.window(handles[2])
ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME)
driver.close()
driver.switch_to.window(parent_handle)
All program run properly but "ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME)" not working. how to resolve it.
i'm newly for selenium python. Please guide to me
You need to add .perform() at the end:
ActionChains(driver).key_down(Keys.ALT).send_keys("F", "A").key_up(Keys.ALT).send_keys(Keys.HOME).perform()
From the documentation:
When you call methods for actions on the ActionChains object, the actions are stored in a queue in the ActionChains object. When you call perform(), the events are fired in the order they are queued up.
Related
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import chromedriver_autoinstaller
from selenium.webdriver.common.keys import Keys
import subprocess
import time
from selenium.webdriver.common.by import By
import functionmodules
### PATH
CHROME_DRIVER = 'C:\\Users\SANGHYUN\Downloads\chromedriver_win32\chromedriver.exe'
url = 'https://cafe.naver.com/reply14/1'
#url = 'https://cafe.naver.com/reply14'
CHROME_PATH = 'C:\\Program Files\Google\Chrome\Application\chrome.exe'
searchpath = url
subprocess.Popen(r'C:\\Program Files\Google\Chrome\Application\chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\chrometemp"')
option = Options()
option.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
chrome_ver = chromedriver_autoinstaller.get_chrome_version().split('.')[0]
try:
driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
except:
chromedriver_autoinstaller.install(True)
driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
driver.get(searchpath)
def CallGoToArticleStep():
# go to main
driver.switch_to.parent_frame()
driver.find_element(By.XPATH, '//*[#id="menuLink1"]').click()
driver.switch_to.frame('cafe_main')
# click article3
time.sleep(2)
firstarticle = '//*[#id="main-area"]/div[4]/table/tbody/tr[2]/td[1]/div[3]/div/a[1]'
element3 = driver.find_element(By.XPATH, firstarticle)
element3.send_keys('\n')
#CallGoToArticleStep()
# write reply, send reply
for i in range (1):
time.sleep(4)
print (i)
replyString = '//*[#id="app"]/div/div/div[2]/div[2]/div[4]/div[2]/div[1]/textarea'
replyElement = driver.find_element(By.XPATH, replyString)
replyElement.send_keys('whisky life')
replyClickString = '//*[#id="app"]/div/div/div[2]/div[2]/div[4]/div[2]/div[2]/div[2]/a'
replyClickElement = driver.find_element(By.XPATH, replyClickString)
replyClickElement.click()
time.sleep(1000)`
In this source call CallGoToArticleStep() can get replyElement, not call then can't get replyElement but, browser element equal.
is there way to not call CallGoToArticleStep function and get replyElement?
Hello I did this code that returns to me a list of li , but I want to access to each a tag mentioned inside and open it , if you have any recommandation I would be very grateful
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
options = Options()
# Creating our dictionary
all_services = pd.DataFrame(columns=['Motif', 'Description'])
path = "C:/Users/Al4D1N/Documents/ChromeDriver_webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=path)
driver.get("https://www.mairie.net/national/acte-naissance.htm#plus")
list_of_services = driver.find_elements_by_css_selector(".list-images li")
I know that I need to iterate in each list_of_services Item , but I don't know how can I open each a tag since they all don't have classes or ids that can help me to make difference between them
This is one way to extract all of the links within the hrefs.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import time
options = Options()
# Creating our dictionary
all_services = pd.DataFrame(columns=['Motif', 'Description'])
path = "C:/Users/Al4D1N/Documents/ChromeDriver_webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=path)
driver.get("https://www.mairie.net/national/acte-naissance.htm#plus")
#Get all elements in class 'list-images'
list_of_services = driver.find_elements_by_class_name("list-images")
for service in list_of_services:
#In each element, select the atags
atags = service.find_elements_by_css_selector('a')
for atag in atags:
#In each atag, select the href
href = atag.get_attribute('href')
Output:
https://www.mairie.net/national/acte-mariage.htm#acte-naissance
https://www.mairie.net/national/acte-deces.htm#acte-naissance
https://www.mairie.net/national/carte-identite.htm#acte-naissance
https://www.mairie.net/national/passeport.htm#acte-naissance
https://www.mairie.net/national/casier-judiciaire.htm#acte-naissance
https://www.mairie.net/national/demande-carte-electorale.htm#acte-naissance
https://www.mairie.net/national/cadastre-plu.htm#acte-naissance
https://www.mairie.net/national/carte-grise-en-ligne-par-internet.htm#acte-naissance
https://www.mairie.net/national/certificat-non-gage.htm#acte-naissance
https://www.mairie.net/national/permis-conduire-delivrance.htm#acte-naissance
https://www.mairie.net/national/changement-adresse.htm#acte-naissance
I am not able to print the link of the final pdf which is opening after running the given code
from selenium import webdriver
from selenium.webdriver.support import ui
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
def page_is_loaded(driver):
return driver.find_element_by_tag_name("body")!= None
def check_exists_by_text(text):
try:
driver.find_element_by_link_text(text)
except NoSuchElementException:
return False
return True
driver = webdriver.Chrome("C:/Users/Roshan/Desktop/sbi/chromedriver")
driver.maximize_window()
driver.get("http://www.careratings.com/brief-rationale.aspx")
wait = ui.WebDriverWait(driver,10)
wait.until(page_is_loaded)
location_field = driver.find_element_by_name("txtfromdate")
location_field.send_keys("2019-05-06")
last_date = driver.find_element_by_name("txttodate")
last_date.send_keys("2019-05-21")
driver.find_element_by_xpath("//input[#name='btn_submit']").click()
if check_exists_by_text('Reliance Capital Limited'):
elm =driver.find_element_by_link_text('Reliance Capital Limited')
driver.implicitly_wait(5)
elm.click()
driver.implicitly_wait(50)
#time.sleep(5)
#driver.quit()
else :
print("Company is not rated in the given Date range")
I am expecting the actual output is the link of this pdf :
"http://www.careratings.com/upload/CompanyFiles/PR/Reliance%20Capital%20Ltd.-05-18-2019.pdf"
but I do not know how to print this link
You need to find all elements in table, then extract data from them.
from selenium import webdriver
import os
# setup path to chrome driver
chrome_driver = os.getcwd() + '/chromedriver'
# initialise chrome driver
browser = webdriver.Chrome(chrome_driver)
# load url
browser.get('http://www.careratings.com/brief-rationale.aspx')
# setup date range
location_field = browser.find_element_by_name("txtfromdate")
location_field.send_keys("2019-05-06")
last_date = browser.find_element_by_name("txttodate")
last_date.send_keys("2019-05-21")
browser.find_element_by_xpath("//input[#name='btn_submit']").click()
# get all data rows
content = browser.find_elements_by_xpath('//*[#id="divManagementSpeak"]/table/tbody/tr/td/a')
# get text and href link from each element
collected_data = []
for item in content:
url = item.get_attribute("href")
description = item.get_attribute("innerText")
collected_data.append((url, description ))
Output:
('http://www.careratings.com/upload/CompanyFiles/PR/Ashwini%20Frozen%20Foods-05-21-2019.pdf', 'Ashwini Frozen Foods')
('http://www.careratings.com/upload/CompanyFiles/PR/Vanita%20Cold%20Storage-05-21-2019.pdf', 'Vanita Cold Storage')
and so on
I would say you just need to put this line:
pdf_link = elm.get_attribute("href")
Just check out the below image. You have missed one important part to click on. When you enter some text in that inputbox, there is a dropdown projected downward displaying the search results available in their stock to choose from. Once you click on that, the rest are as it is.
Try the following script:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = "http://www.careratings.com/brief-rationale.aspx"
with webdriver.Chrome() as driver:
driver.get(url)
wait = WebDriverWait(driver,10)
location_field = wait.until(EC.presence_of_element_located((By.NAME, "txtfromdate")))
location_field.send_keys("2019-05-06")
last_date = wait.until(EC.presence_of_element_located((By.NAME, "txttodate")))
last_date.send_keys("2019-05-21")
input_search = wait.until(EC.presence_of_element_located((By.NAME, "txtSearchCompany_brief")))
input_search.send_keys('Reliance Capital Limited')
time.sleep(3) #could not get rid of this hardcoded delay to make the script work
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"[onclick*='Reliance Capital Limited']"))).click()
# time.sleep(2) #activate this line in case the script behaves otherwise
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"input[name='btn_submit']"))).click()
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"table tr td > a[href$='.pdf']"))):
print(item.get_attribute("href"))
Using link;
image on flickr, requests only returns html to the comment:
`<!-- rendered with love by pprd1-node580-lh1.manhattan.bf1.yahoo.com -->`
(see image below for html).
I would like to access the links within in the img elements 3 div elements below so would appreciate any input.
from bs4 import BeautifulSoup
import logging
import os
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import shutil
import sys
import time
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - \
%(levelname)s - %(message)s")
def flickr_images():
try:
search_term, number_images = sys.argv[1:]
num_req_images = int(number_images)
except ValueError:
print("Something went wrong. Command line input must be of \
format: 'filename searchterm numberimages'")
return
# navigate to search results page
driver = webdriver.Firefox()
# poll DOM for max 10 secs if element not immediately available
driver.implicitly_wait(10)
driver.get("https://www.flickr.com/search/?text=" + search_term)
driver.maximize_window()
# 0sec wait = 25images, 1sec = 48, 3+sec = 98
time.sleep(3)
image_link_elems = driver.find_elements_by_class_name("overlay")
# Incase requested is > found
num_images_tosave = min(req_images, len(image_link_elems))
image_elems_tosave = image_link_elems[:num_images_tosave]
print("{} images found.".format(num_images_tosave))
logging.info("Length photos: {}".format(len(image_link_elems)))
# extract image src's from found elements
src_links = []
image_links = [link.get_attribute("href") for link in image_elems_tosave]
for image_link in image_links:
res = requests.get(image_link)
res.raise_for_status
soup = bs4.BeautifulSoup(res.text, "html.parser")
src_elem = soup.select(".zoom-small")
HTML image:
I am trying to do web scraping from http://www.gps-coordinates.net/ using selenium, and I have problem in 2 areas.
Google Autocomplete causes me to unable to click the 'get
coordinates' button. I tried to solve it but sometimes it still
doesnt work. ( Message: Element is not clickable at point (280, 17.800003051757812). Other element would receive the click )
Pop up indicates that there are no result available, but I still fail to handle the pop-up.
`
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
import openpyxl
import xlsxwriter
import pyperclip
driver = ""
def setup():
global driver
driver = webdriver.Firefox()
driver.get("http://www.gps-coordinates.net/")
assert 'Google Map' in driver.title
def sele(address):
setup()
global driver
query = driver.find_element_by_id('address')
query.clear()
query.send_keys(address)
query.send_keys(Keys.RETURN)
driver.implicitly_wait(0.7)
query.send_keys(Keys.DOWN) #To solve in case there is autocomplete by Google
query.send_keys(Keys.RETURN)
button = driver.find_element_by_xpath("//*[#id='wrap']/div[2]/div[4]/div[1]/form[1]/div[2]/div/button")
button.click()
driver.implicitly_wait(0.7)
if EC.alert_is_present(): #If there is a popup, that means there is no result for the geocoding
alert=driver.switch_to_alert()
alert.accept()
latlong = ['Fail','Fail']
return latlong
else:
latraw = driver.find_element_by_id('latitude')
longraw = river.find_element_by_id('longitude')
latraw.send_keys(Keys.CONTROL,'A')
latraw.send_keys(Keys.CONTROL,'C')
lat = pyperclip.paste()
latraw.clear()
longraw.send_keys(Keys.CONTROL,'A')
longraw.send_keys(Keys.CONTROL,'C')
long = pyperclip.paste()
latraw.clear()
return [lat,long]
I solved my problem with this code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
import openpyxl
import xlsxwriter
import pyperclip
driver = ""
def setup():
global driver
driver = webdriver.Firefox()
driver.get("http://www.gps-coordinates.net/")
assert 'Google Map' in driver.title
def sele(address):
global driver
query = driver.find_element_by_id('address')
query.clear()
query.send_keys(address)
query.send_keys(Keys.RETURN)
driver.implicitly_wait(0.7)
query.send_keys(Keys.DOWN) #To solve in case there is autocomplete by Google
query.send_keys(Keys.RETURN)
button = driver.find_element_by_xpath("//*[#id='wrap']/div[2]/div[4]/div[1]/form[1]/div[2]/div/button")
button.click()
driver.implicitly_wait(0.7)
try:
alert=driver.switch_to_alert()
alert.accept()
latlong = ['Fail','Fail']
return latlong
except Exception :
latraw = driver.find_element_by_id('latitude')
longraw = driver.find_element_by_id('longitude')
newquery = driver.find_element_by_id('address')
lat = latraw.get_attribute('value')
long = longraw.get_attribute('value')
query = newquery.get_attribute('value')
return [lat,long,query]
def wrapper(inputad,outputad,k):
InFile = openpyxl.load_workbook(inputad)
Sheet = InFile['Sheet1']
workbook = xlsxwriter.Workbook(outputad)
worksheet = workbook.add_worksheet()
TotalLength = Sheet.max_row
ProgressChecker = 0
for i in range(1, TotalLength +1):
ProgressChecker = ProgressChecker + 1
addtext = Sheet[i][k].value
try:
latlong = sele(addtext)
worksheet.write(i,0,addtext)
worksheet.write(i,1,latlong[0])
worksheet.write(i,2,latlong[1])
worksheet.write(i,2,latlong[2])
print("Progress: ", ProgressChecker, " out of " ,TotalLength)
except Exception :
worksheet.write(i,0,addtext)
worksheet.write(i,1,"Failure Inside Iteration")
print("Progress: ", ProgressChecker, " out of " ,TotalLength)
continue
complete = "complete"
return complete