How to re-load page while looping over elements? - python

This is my code, should be easily recreateable:
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
def main():
# Setup chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920x3500")
# Set path to chromedriver as per your configuration
webdriver_service = Service("/home/sumant/chromedriver/stable/chromedriver")
# Choose Chrome Browser
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
driver.maximize_window()
# Get page
url = "https://www.ibrance.com/"
driver.get(url)
time.sleep(2)
ele = driver.find_elements_by_tag_name('a')
for i, e in enumerate(ele):
try:
print(e.get_attribute('outerHTML'))
e.click()
time.sleep(2)
driver.save_screenshot(f"/mnt/d/Work/ss{i}.png")
driver.get(url)
# driver.refresh()
except:
print("element not interactable")
driver.close()
driver.quit()
if __name__ == '__main__':
main()
The idea is I click on a link take a screenshot, load home page again, click on next link and so on.
After the first link, it is not able to find any other element on the reloaded page.

This is correct, since after the refresh it is unable to find you required elements.
To do so, elements need to be reloaded after each refresh.
Do this:
ele = driver.find_elements_by_tag_name('a')
for i, e in enumerate(ele):
try:
print(e.get_attribute('outerHTML'))
e.click()
time.sleep(2)
driver.save_screenshot(f"/mnt/d/Work/ss{i}.png")
driver.get(url)
driver.refresh()
# reload elements
ele = driver.find_elements_by_tag_name('a')

So this worked
(Thanks YuMa, for the inspiration)
def main():
# ...
# Get page
url = "https://www.ibrance.com/"
driver.get(url)
time.sleep(2)
total_element = driver.find_elements_by_tag_name('a')
total_clicks = len(total_element)
def get_images(ele, i):
try:
ele[i].click()
time.sleep(2)
# driver.save_screenshot(f"/mnt/d/Work/ss{i}.png")
print(driver.title)
driver.get(url)
time.sleep(2)
except:
print("")
for i in range(0, total_clicks+1):
ele = driver.find_elements_by_tag_name('a')
get_images(ele, i)

Related

Selenium webdriver: How to delete/flag spam comments on YouTube platform (without API)

I've been trying to flag/report a list of spam comments in a particular YouTube video.
For that I've been using this code on Python, which loads my previous profile so I log in with my account:
URL = "https://www.youtube.com/watch?
v=dvecqwfU6xw&lc=Ugxw_nsUNUor9AUEBGp4AaABAg.9fDfvkgiqtW9fDkE2r6Blm"
soup = BeautifulSoup(requests.get(URL).content, "html.parser")
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
# resp = driver.request('POST', 'https://www.youtube.com/youtubei/v1/flag/get_form?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false')
# print(resp.text)
button = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="button"]')))
driver.execute_script("arguments[0].click();", button)
The problem comes with opening the menu, I believe since you have to hover over the 3 dots menu it would then appear as the clickable menu so I never get to open the actual menu to report/flag the comment.
My mistake was not to take full Xpath path.... It works perfectly like this, THANKS
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
options.add_argument('--headless')
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
option_button = '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[2]/ytd-comments/ytd-item-section-renderer/div[3]/ytd-comment-thread-renderer[1]/div/ytd-comment-replies-renderer/div[2]/ytd-comment-renderer/div[3]/div[3]/ytd-menu-renderer/yt-icon-button/button'
option_button = wait.until(EC.presence_of_element_located((By.XPATH, option_button)))
driver.execute_script("arguments[0].click();", option_button)
report_button = '/html/body/ytd-app/ytd-popup-container/tp-yt-iron-dropdown/div/ytd-menu-popup-renderer/tp-yt-paper-listbox/ytd-menu-service-item-renderer/tp-yt-paper-item/yt-formatted-string'
report_button = wait.until(EC.presence_of_element_located((By.XPATH,report_button)))
driver.execute_script("arguments[0].click();", report_button)
report_button_spam = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/tp-yt-paper-dialog-scrollable/div/div/yt-options-renderer/div/tp-yt-paper-radio-group/tp-yt-paper-radio-button[1]/div[1]'
report_button_spam = wait.until(EC.presence_of_element_located((By.XPATH, report_button_spam)))
driver.execute_script("arguments[0].click();", report_button_spam)
report_button_send = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/div/yt-button-renderer[2]/a/tp-yt-paper-button'
report_button_send = wait.until(EC.presence_of_element_located((By.XPATH, report_button_send)))
driver.execute_script("arguments[0].click();", report_button_send)
popup_button_done = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog[2]/yt-confirm-dialog-renderer/div[2]/div[2]/yt-button-renderer[3]/a/tp-yt-paper-button'
popup_button_done = wait.until(EC.presence_of_element_located((By.XPATH, popup_button_done)))
print(popup_button_done.text)

Python selenium - WebDriverException: target frame detached - for Login button

I am trying to automate login to the website https://research.axiscapital.co.in/.
I am able to add username and password. I have also automated solving the captcha. But after it solves the captcha, I am unable to click the login button. I get the WebDriverException: target frame detached exception. I am adding the code below (without the real username and password) for assistance.
NOTE: As soon as the captcha verification expires, the login button becomes clickable again. Kindly help me with it.
import requests
import time
import os
# Added for Selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
# TO MAKE THE SCRAPING FASTER
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver = webdriver.Chrome()
driver.maximize_window()
driver.get("https://research.axiscapital.co.in/")
filename = '1.mp3'
delayTime = 2
googleIBMLink = 'https://speech-to-text-demo.ng.bluemix.net/'
audioToTextDelay = 10
def audioToText(mp3Path):
print("1")
driver.execute_script('''window.open("","_blank");''')
driver.switch_to.window(driver.window_handles[1])
print("2")
driver.get(googleIBMLink)
delayTime = 10
# Upload file
time.sleep(1)
print("3")
# Upload file
time.sleep(1)
root = driver.find_element_by_id('root').find_elements_by_class_name('dropzone _container _container_large')
btn = driver.find_element(By.XPATH, '//*[#id="root"]/div/input')
btn.send_keys('D:\\blogs\\1.mp3')
# Audio to text is processing
time.sleep(delayTime)
#btn.send_keys(path)
print("4")
# Audio to text is processing
time.sleep(audioToTextDelay)
print("5")
text = driver.find_element(By.XPATH, '//*[#id="root"]/div/div[7]/div/div/div').find_elements_by_tag_name('span')
print("5.1")
result = " ".join( [ each.text for each in text ] )
print("6")
driver.close()
driver.switch_to.window(driver.window_handles[0])
print("7")
return result
def saveFile(content,filename):
with open(filename, "wb") as handle:
for data in content.iter_content():
handle.write(data)
wait = WebDriverWait(driver,60)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="Username"]'))).send_keys(username)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#name="Password"]'))).send_keys(password)
time.sleep(1)
googleClass = driver.find_elements_by_class_name('g-recaptcha')[0]
time.sleep(2)
outeriframe = googleClass.find_element_by_tag_name('iframe')
time.sleep(1)
outeriframe.click()
time.sleep(2)
allIframesLen = driver.find_elements_by_tag_name('iframe')
time.sleep(1)
audioBtnFound = False
audioBtnIndex = -1
for index in range(len(allIframesLen)):
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[index]
driver.switch_to.frame(iframe)
driver.implicitly_wait(delayTime)
try:
audioBtn = driver.find_element_by_id('recaptcha-audio-button') or driver.find_element_by_id('recaptcha-anchor')
audioBtn.click()
audioBtnFound = True
audioBtnIndex = index
break
except Exception as e:
pass
if audioBtnFound:
try:
while True:
href = driver.find_element_by_id('audio-source').get_attribute('src')
response = requests.get(href, stream=True)
saveFile(response,filename)
response = audioToText(os.getcwd() + '/' + filename)
print(response)
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[audioBtnIndex]
driver.switch_to.frame(iframe)
inputbtn = driver.find_element_by_id('audio-response')
inputbtn.send_keys(response)
inputbtn.send_keys(Keys.ENTER)
time.sleep(2)
errorMsg = driver.find_elements_by_class_name('rc-audiochallenge-error-message')[0]
if errorMsg.text == "" or errorMsg.value_of_css_property('display') == 'none':
print("Success")
break
except Exception as e:
print(e)
print('Caught. Need to change proxy now')
else:
print('Button not found. This should not happen.')
time.sleep(4)
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[text()="Login"]'))).click()
You forgot to switch to default context after successfully completing the captcha.
put driver.switch_to.default_content() before break.
Edit: the success block would look like this.
print("Success")
driver.switch_to.default_content()
break

Scraping Multiple urls selenium

I'm new to coding but i wrote this code that scraps the page fine but i want to scrape multiple of these urls like 200 how do i do that?
from selenium import webdriver
chrome_path = r"C:\Users\lenovo\Downloads\chromedriver_win32 (5)\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("https://www.kijijiautos.ca/vip/22442312")
driver.find_element_by_xpath('//div[#class="b1yLWE b3zFtQ"]').text
btn = driver.find_element_by_xpath('//button[#class="g1zAe-"]')
btn.click()
driver.find_elements_by_xpath('//span[#class="A2jAym q2jAym"]').text
driver.find_element_by_xpath('//div[#class="b1yLWE b1zAe-"]').text
print(driver.current_url)
Something like below
from selenium import webdriver
chrome_path = r"C:\Users\lenovo\Downloads\chromedriver_win32 (5)\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
def get_scarping(link):
driver.get(link)
driver.find_element_by_xpath('//div[#class="b1yLWE b3zFtQ"]').text
btn = driver.find_element_by_xpath('//button[#class="g1zAe-"]')
btn.click()
driver.find_elements_by_xpath('//span[#class="A2jAym q2jAym"]').text
driver.find_element_by_xpath('//div[#class="b1yLWE b1zAe-"]').text
print(driver.current_url)
return driver.current_url
links = ["https://www.kijijiautos.ca/vip/22442312", "other_urls"]
scrapings = []
for link in links:
scrapings.append(get_scarping(link))
Just add for loop
from selenium import webdriver
chrome_path = r"C:\Users\lenovo\Downloads\chromedriver_win32 (5)\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
for x in range(200):
driver.get("https://www.kijijiautos.ca/vip/22442312")
driver.find_element_by_xpath('//div[#class="b1yLWE b3zFtQ"]').text
btn = driver.find_element_by_xpath('//button[#class="g1zAe-"]')
btn.click()
driver.find_elements_by_xpath('//span[#class="A2jAym q2jAym"]').text
driver.find_element_by_xpath('//div[#class="b1yLWE b1zAe-"]').text
print(driver.current_url)

Stale Element error after a specific element in a list

Trying to get the tyres' details from this page. https://eurawheels.com/fr/catalogue/BBS
links = driver.find_elements_by_xpath('//div[#class="col-xs-1 col-md-3"]//a')
parent_window = driver.current_window_handle
x = 0
for j in range(len(links)):
driver.execute_script('window.open(arguments[0]);', links[j])
#scraping here
if x == 0:
driver.close()
driver.switch_to.window(parent_window)
x += 1
else:
driver.back()
driver.refresh() #refresh page
tyres = WebDriverWait(driver, 25).until(EC.visibility_of_all_elements_located((By.XPATH, '//div[#class="card-body text-center"]//a'))) #redefine links
time.sleep(4)
It works for 10 links but then the links go stale. Cannot figure out what needs to be changed. Any help is welcome.
You need to add scroll element into the view before executing driver.execute_script('window.open(arguments[0]);', links[j]) since not all the elements are initially loaded on the page.
So your code should look like following:
from selenium.webdriver.common.action_chains import ActionChains
actions = ActionChains(driver)
links = driver.find_elements_by_xpath('//div[#class="col-xs-1 col-md-3"]//a')
parent_window = driver.current_window_handle
x = 0
for j in range(len(links)):
actions.move_to_element(j).perform()
driver.execute_script('window.open(arguments[0]);', links[j])
#scraping here
if x == 0:
driver.close()
driver.switch_to.window(parent_window)
x += 1
else:
driver.back()
driver.refresh() #refresh page
tyres = WebDriverWait(driver, 25).until(EC.visibility_of_all_elements_located((By.XPATH, '//div[#class="card-body text-center"]//a'))) #redefine links
time.sleep(4)
Try this:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
link = 'https://eurawheels.com/fr/catalogue/BBS'
with webdriver.Chrome() as driver:
wait = WebDriverWait(driver,15)
driver.get(link)
linklist = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".card-body > a")))
for i,elem in enumerate(linklist):
linklist[i].click()
wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,".spinner-border[role='status']")))
time.sleep(2) #if you kick out this delay, your script will run very fast but you may end up getting same results multiple times.
item = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h3"))).text
print(item)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"h1.modal-title + button[class='close'][data-dismiss='modal']"))).click()
driver.back()

Python Selenium Upload photo to Facebook

I have tried so many times trying to upload a photo to a Facebook post. When I read the selenium documents all it says was
Select the <input type="file"> element and call the send_keys() method passing the file path, either the path relative to the test script, or an absolute path.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
usr = "random#gmail.com"
pwd = "randompassword"
driver = webdriver.Firefox()
# or you can use Chrome(executable_path="/usr/bin/chromedriver")
driver.get("http://www.facebook.com/login")
assert "Facebook" in driver.title
elem = driver.find_element_by_id("email")
elem.send_keys(usr)
elem = driver.find_element_by_id("pass")
elem.send_keys(pwd)
elem.send_keys(Keys.RETURN)
elem = driver.find_element_by_css_selector("#u_0_y")
elem.send_keys("Hello Internet :) ")
driver.find_element_by_css_selector("._11b").click()
This works for me..
def main():
# Your Facebook account user and password
usr = "test.fb.post#gmail.com"
pwd = "test123456789"
grp = ['https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/']
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_experimental_option("prefs", { \
"profile.default_content_setting_values.notifications": 2 # 1:allow, 2:block
})
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.implicitly_wait(15) # seconds
# Go to facebook.com
driver.get("http://www.facebook.com")
sleep(2)
# Enter user email
elem = driver.find_element_by_id("email")
elem.send_keys(usr)
# Enter user password
elem = driver.find_element_by_id("pass")
elem.send_keys(pwd)
# Login
elem.send_keys(Keys.RETURN)
sleep(10)
for group in grp:
driver.get(group)
try:
try:
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#name='xhpc_message_text']") ))
commentr.click()
except Exception:
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#loggingname='status_tab_selector']") ))
commentr.click()
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#class='_3u15']") ))
commentr.click()
sleep(3)
l=driver.find_elements_by_tag_name('input')
sleep(1)
for g in l:
if g==driver.find_element_by_xpath("//input[#type='file'][#class='_n _5f0v']"):
sleep(1)
g.send_keys(ipath)
print('image loaded')
sleep(10)
driver.find_element_by_xpath("//*[#class='_1mf _1mj']").send_keys(message)
sleep(1)
buttons = driver.find_elements_by_tag_name("button")
sleep(1)
for button in buttons:
if button.text == "Post":
sleep(5)
button.click()
sleep(10)
except Exception:
pass
print ('image not posted in '+group)
driver.close()
if __name__ == '__main__':
main()
Instead of using css_selector, try using xpath.
statuselement=driver.find_element_by_xpath("//[#name='xhpc_message']").click() driver.find_element_by_xpath("//[#class='_3jk']").click() l=driver.find_elements_by_tag_name('input') ipath="C:\Users\Utente\Pictures\CoutureBeardsley.jpg" for g in l: if g==driver.find_element_by_xpath("//input[#type='file'][#class='_n _5f0v']"): g.send_keys(ipath) print('image loaded')

Categories