youtube page scroll down using selenium, python

youtube page scroll down using selenium, python - python

url_main = "https://www.youtube.com/feed/history"
I want to scroll down infinitely in the webpage above until all contents are visible, so I tried
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
driver = webdriver.Chrome(driverpath)
wait = WebDriverWait(driver, 10)
driver.get(url_main)
login = driver.find_element_by_css_selector('a[href="https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Dko%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252Ffeed%252Fhistory&hl=ko&ec=65620"]')
login.click()
login = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,'input[type="email"]')))
login.send_keys(email)
login.send_keys(Keys.RETURN)
login = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,'input[type="password"]')))
login.send_keys(password)
login.send_keys(Keys.RETURN)
content = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR,'div[id="primary"] div[id="contents"]')))
no_of_pagedowns = 1
while no_of_pagedowns:
content.send_keys(Keys.PAGE_DOWN)
time.sleep(1)
no_of_pagedowns-=1
But not working. I also tried copy-pasting the code in this link but it didn't work at all also.

You can implement something like this:
# save the max scrollHeight of the page
scrollHeight = d.execute_script("return window.scrollMaxY")
scrolled_pages = 0
# while we have not reached the max scrollHeight
while d.execute_script("return window.pageYOffset") < scrollHeight:
# scroll one page down
d.execute_script("window.scrollByPages(1)")
scrolled_pages += 1
# wait to load any lazy loaded images (may not be needed, depending on your usecase)
time.sleep(0.2)

Related

Selenium not updating current window

Selenium seems to be looking for data in an old page and not the new one.
I'm trying to automate a search where I select from a dropdown menu and fill a box with some value
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
url = 'http://www.op.nysed.gov/opsearches.htm'
value = '60'
license_no = '084157'
driver = webdriver.Chrome()
driver.get(url)
select = Select(driver.find_element(By.XPATH, '//form[#id="licensee-num-form"]/center/select'))
select.select_by_value(value)
fill = driver.find_element(By.XPATH, '//form[#id="licensee-num-form"]/center/input')
fill.send_keys(license_no)
fill.send_keys(Keys.ENTER)
data = driver.find_element(By.XPATH, "//div[#id='content_column']")
However, when I print data.text, it prints data from the first page, not the second one. I tried using driver.refresh() to refresh the page but it did not work.

This happens because you getting the data = driver.find_element(By.XPATH, "//div[#id='content_column']") immediately after clicking the Enter. The page still not refreshed. You should add a short delay there and then wait for element visibility on the refreshed page.
Please try this:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
url = 'http://www.op.nysed.gov/opsearches.htm'
value = '60'
license_no = '084157'
driver = webdriver.Chrome()
wait = WebDriverWait(driver, 20)
driver.get(url)
select = Select(driver.find_element(By.XPATH, '//form[#id="licensee-num-form"]/center/select'))
select.select_by_value(value)
fill = driver.find_element(By.XPATH, '//form[#id="licensee-num-form"]/center/input')
fill.send_keys(license_no)
fill.send_keys(Keys.ENTER)
time.sleep(0.5)
data = wait.until(EC.visibility_of_element_located((By.XPATH, "//div[#id='content_column']")))

How do I retrieve the link of an image through Selenium

I'm trying to make my program fetch the link of an image and then store it as a string in a variable.
This is the xpath of the image. I need to do it through xpaths because the xpaths on the website are very similar bar the "/article[x]". This allow me to increase the number with a variable so that I can go through all the xpaths on the page.
/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img
Picture of the website that I'm trying to retrieve the links of the image
My code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import tkinter
import time
Anime = input("Enter Anime:")
driver = webdriver.Chrome(executable_path=r"C:\Users\amete\Documents\chromedriver.exe")
driver.get("https://myanimelist.net/search/all?q=one%20piece&cat=all")
search = driver.find_element_by_xpath('//input[#name="q"]')
wait = WebDriverWait(driver, 20)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#name="q"]')))
# Clears the field
search.send_keys(Keys.CONTROL, 'a')
search.send_keys(Keys.DELETE)
# The field is now cleared and the program can type whatever it wants
search.send_keys(Anime)
search.send_keys(Keys.RETURN)
# Accept the cookies
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="qc-cmp2-ui"]/div[2]/div/button[3]'))).click()
# Added this wait
wait.until(EC.element_to_be_clickable((By.XPATH,'//h2[#id="anime"]//ancestor::div[#class="content-left"]//article[1]/div[contains(#class, "list")][1]/div[contains(#class, "information")]/a[1]')))
link = driver.find_element_by_xpath('//h2[#id="anime"]//ancestor::div[#class="content-left"]//article[1]/div[contains(#class, "list")][1]/div[contains(#class, "information")]/a[1]').text
piclink = driver.('/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img')
print (piclink)

you can get it like this (specify the attribute)
piclink = driver.find_element_by_xpath('/html/body/div[2]/div[2]/div[3]/div[2]/div[2]/div[1]/div/article[1]/div[2]/div[1]/a/img').get_attribute('src')
print(piclink)

Unable to click on next button with Selenium

I've been at this for hours and haven't made any progress. I'm trying to click on the next button on this page here
Here's my code:
#!/usr/local/bin python3
import sys
import time
import re
import logging
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as options
from bs4 import BeautifulSoup as bs
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
_USE_VIRTUAL_DISPLAY = False
_FORMAT = '%(asctime)s - %(levelname)s - %(name)s - %(message)s'
# logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
logging.basicConfig(format=_FORMAT, level=logging.INFO)
_LOGGER = logging.getLogger(sys.argv[0])
_DEFAULT_SLEEP = 0.5
try:
options = options()
# options.headless = True
driver = webdriver.Firefox(options=options, executable_path=r"/usr/local/bin/geckodriver")
print("Started Browser and Driver")
except:
_LOGGER.info("Can not run headless mode.")
url = 'https://www.govinfo.gov/app/collection/uscourts/district/alsd/2021/%7B%22pageSize%22%3A%22100%22%2C%22offset%22%3A%220%22%7D'
driver.get(url)
time.sleep(5)
page = driver.page_source
soup = bs(page, "html.parser")
next_page = WebDriverWait(driver,5).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="collapseOne1690"]/div/span[1]/div/ul/li[8]/a')))
if next_page:
print('*****getting next page*****')
# driver.execute_script('arguments[0].click()', next_page)
next_page.click()
time.sleep(3)
else:
print('no next page')
driver.quit()
I get a timeout error. I've tried changing the XPath. I've tried ActionChains to scroll into view and none have worked. Any help appreciated.

1 Your XPATH does not work because it uses dynamic class name collapseOne1690, as was mentioned earlier.
Also, it's not very stable even if you used a part of this class name.
If you prefer XPaths, I'd suggest this one: //span[#class='custom-paginator']//li[#class='next fw-pagination-btn']/a or just //li[#class='next fw-pagination-btn']/a. You can also use css selector: .next.fw-pagination-btn
2 I got rid of logging code because it also has some issues, re-check it.
3 5 seconds explicit wait is too small. Make it at least 10 seconds, better 15. It's just a suggestion.
The smallest reproducible code which clicks the button and uses Firefox is:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options as options
from bs4 import BeautifulSoup as bs
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
options = options()
# options.headless = True
driver = webdriver.Firefox(options=options)
print("Started Browser and Driver")
url = 'https://www.govinfo.gov/app/collection/uscourts/district/alsd/2021/%7B%22pageSize%22%3A%22100%22%2C%22offset%22%3A%220%22%7D'
driver.get(url)
page = driver.page_source
soup = bs(page, "html.parser")
print(soup)
next_page = WebDriverWait(driver, 15).until(
EC.element_to_be_clickable((By.XPATH, "//span[#class='custom-paginator']//li[#class='next fw-pagination-btn']/a")))
next_page.click()
# driver.quit()

It appears when I load up this page that the div id's are assigned dynamically. The first time I loaded the page, the id was collapseOne5168, the second time it was collapseOne1136
You might consider using find_element_by_class_name("next fw-pagination-btn") instead?

Why i can't click this button on selenium webdriver python?

I cannot click some buttons in the router interface. I was only able to click through using pyautogui. But this method is not functional. How can I click this button on Selenium? I will use this code to reset my ip address.
This is the css code of the place I want to click:
Html Data:
https://mega.nz/file/2XJyEbCR#xBcEtzYh8QFLWTmSfAqll2V-p-SHiaw4wEz1RAWtso0
I tryied all method but not worked.
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).send_keys("\n")
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).send_keys(Keys.ENTER)
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].send_keys("\n")
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].send_keys(Keys.ENTER)
try:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn')))[0].click()
except:
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).click()

try this
link = driver.find_element_by_link_text('')
link.click()
you want to click link,
maybe this example helps you.

I solved. I should have used frames.
#Selenium
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver import Firefox
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.actions.interaction import KEY
#Beautiful Soup
from bs4 import BeautifulSoup
import lxml.html
import time
import pyautogui
def router_reset():
print ("Modem resetleniyor")
driver = webdriver.Chrome('C:/Anaconda3/chromedriver.exe')
driver.get('http://192.168.1.1/login.cgi')
username = driver.find_element_by_id('AuthName')
password = driver.find_element_by_id('AuthPassword')
login = driver.find_element_by_xpath("//*[#id='login']/fieldset/ul/li[6]/input")
username.send_keys("admin")
password.send_keys("turktelekom")
login.click()
time.sleep(1)
#Açılan Ekranı Atla Tuşu
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="login"]/fieldset/ul/li[3]/input[2]'))).click()
source = WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="network"]')))
#target = driver.find_element_by_id("div2")
target = WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="network"]')))
# Create the object for Action Chains
actions = ActionChains(driver)
actions.drag_and_drop(source, target)
# perform the operation on the element
actions.click(target)
actions.perform()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#network-broadband > a'))).click()
time.sleep(2)
WebDriverWait(driver,10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH,'//*[#id="mainFrame"]')))
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#editBtn'))).click()
driver.switch_to.default_content()
time.sleep(5)
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.ui-dialog.ui-widget.ui-widget-content.ui-corner-all > div.ui-dialog-buttonpane.ui-widget-content.ui-helper-clearfix > button:nth-child(2)'))).click()
time.sleep(60)
print("Modeme Reset Atıldı, 60sn Bekleme Süresi Başladı.")
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[#id="logoutName"]'))).click()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.ui-dialog.ui-widget.ui-widget-content.ui-corner-all > div.ui-dialog-buttonpane.ui-widget-content.ui-helper-clearfix > button:nth-child(2)'))).click()
router_reset()

My scraper fails to get all the items from a webpage

I've written some code in python in combination with selenium to parse different product names from a webpage. There are few load more buttons visible if the browser is made to scroll downward. The webpage displays it's full content if the page is made to scroll downmost until there is no load more button to click. My scraper seems to be doing good but I'm not getting all the results. There are around 200 products in that page but I'm getting 90 out of them. What change should I bring about in my scraper to get them all? Thanks in advance.
The webpage I'm dealing with: Page_Link
This is the script I'm trying with:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("put_above_url_here")
wait = WebDriverWait(driver, 10)
page = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".listing_item")))
for scroll in range(17):
page.send_keys(Keys.PAGE_DOWN)
time.sleep(2)
try:
load = driver.find_element_by_css_selector(".lm-btm")
load.click()
except Exception:
pass
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()

Try below code to get required data:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.purplle.com/search?q=hair%20fall%20shamboo")
wait = WebDriverWait(driver, 10)
header = driver.find_element_by_tag_name("header")
driver.execute_script("arguments[0].style.display='none';", header)
while True:
try:
page = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".listing_item")))
driver.execute_script("arguments[0].scrollIntoView();", page)
page.send_keys(Keys.END)
load = wait.until(EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "LOAD MORE")))
driver.execute_script("arguments[0].scrollIntoView();", load)
load.click()
wait.until(EC.staleness_of(load))
except:
break
for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^=item_]"))):
name = item.find_element_by_css_selector(".pro-name.el2").text
print(name)
driver.quit()

You should only Use Selenium as a last resort.
A simple look around in the webpage showed the API it called to get your data.
It returns a JSON output with all the details:
Link
You can now just loop over and store in a dataframe easily.
Very fast, fewer errors than selenium.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

youtube page scroll down using selenium, python - python

Related

Selenium not updating current window

How do I retrieve the link of an image through Selenium

Unable to click on next button with Selenium

Why i can't click this button on selenium webdriver python?

My scraper fails to get all the items from a webpage

Categories

Resources