I have wanting to get the selenium script to comment on a video with a specific keyword.
But I want to make it say the channel name too, can someone please help me with that if possible thanks.
(I know the {}s should not be in there but it will give you an idea on where to put the channel name)
(the channel name would go between the {}s btw)
here is the code.
import time
import os
from bs4 import BeautifulSoup
from selenium import webdriver
def youtube_login(email,password):
op = webdriver.ChromeOptions()
op.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
#op.add_argument('--headless')
op.add_argument('--disable-dev-shm-usage')
op.add_argument('--no-sandbox')
driver = webdriver.Chrome()
driver.get('https://accounts.google.com/ServiceLogin?hl=en&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fhl%3Den%26feature%3Dsign_in_button%26app%3Ddesktop%26action_handle_signin%3Dtrue%26next%3D%252F&uilel=3&passive=true&service=youtube#identifier')
driver.find_element_by_id('identifierId').send_keys(email)
driver.find_element_by_id('identifierNext').click()
time.sleep(3)
#WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#password input[name="password"]')))
driver.find_element_by_css_selector('div#password input[name="password"]').send_keys(password)
time.sleep(4)
driver.find_element_by_id('passwordNext').click()
return driver
def comment_page(driver,urls,comment):
if len( urls ) == 0:
print ('Youtube Comment Bot: Finished!')
return []
url = urls.pop()
driver.get(url)
print(url)
driver.implicitly_wait(1)
if not check_exists_by_xpath(driver,'//*[#id="movie_player"]'):
return comment_page(driver, urls, random_comment())
time.sleep(4)
driver.execute_script("window.scrollTo(0, 600);")
if not check_exists_by_xpath(driver,'//*[#id="simple-box"]/ytd-comment-simplebox-renderer'):
return comment_page(driver, urls, random_comment())
if check_exists_by_xpath(driver,'//*[#id="contents"]/ytd-message-renderer'):
return comment_page(driver, urls, random_comment())
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, "ytd-comments ytd-comment-simplebox-renderer")))
driver.find_element_by_css_selector("ytd-comments ytd-comment-simplebox-renderer div#placeholder-area").click()
driver.implicitly_wait(5)
driver.find_element_by_xpath('//*[#id="contenteditable-root"]').send_keys(comment)
driver.find_element_by_xpath('//*[#id="contenteditable-root"]').send_keys(Keys.CONTROL, Keys.ENTER)
post = WebDriverWait(driver, 15).until(
EC.element_to_be_clickable((By.CSS_SELECTOR,'ytd-comments ytd-comment-simplebox-renderer'))
)
post.click()
r = np.random.randint(2,5)
time.sleep(r)
return comment_page(driver, urls, random_comment())
def random_comment():
# You can edit these lines=======
messages = [
'sup {channel name here}, I loved this video lol cant wait to see more :D'
]
# ===============================
r = np.random.randint(0, len(messages))
return messages[r]
def check_exists_by_xpath(driver,xpath):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
if __name__ == '__main__':
# You should edit these lines=======
email = 'Youremail#gmail.com'
password = 'Yourpassword'
# ==================================
urls = [
'https://www.youtube.com/watch?v=Szww2_VqEKs&t',
]
inp = open("url.txt","r")
for line in inp.readlines():
urls.append(line)
driver = youtube_login(email, password)
comment_page(driver,urls,random_comment())
Here is the locator to get the channel name:
//div[contains(#class, 'channel-name')]//a
All you have to do now is to getText and replace it into your string.
Related
I've been trying to flag/report a list of spam comments in a particular YouTube video.
For that I've been using this code on Python, which loads my previous profile so I log in with my account:
URL = "https://www.youtube.com/watch?
v=dvecqwfU6xw&lc=Ugxw_nsUNUor9AUEBGp4AaABAg.9fDfvkgiqtW9fDkE2r6Blm"
soup = BeautifulSoup(requests.get(URL).content, "html.parser")
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
# resp = driver.request('POST', 'https://www.youtube.com/youtubei/v1/flag/get_form?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false')
# print(resp.text)
button = wait.until(EC.presence_of_element_located((By.XPATH,'//*[#id="button"]')))
driver.execute_script("arguments[0].click();", button)
The problem comes with opening the menu, I believe since you have to hover over the 3 dots menu it would then appear as the clickable menu so I never get to open the actual menu to report/flag the comment.
My mistake was not to take full Xpath path.... It works perfectly like this, THANKS
options = webdriver.ChromeOptions()
user = pathlib.Path().home()
print(user)
options.add_argument(f"user-data-dir={user}/AppData/Local/Google/Chrome/User Data/")
options.add_argument('--headless')
driver= webdriver.Chrome('chromedriver.exe',chrome_options=options)
driver.get(URL)
wait=WebDriverWait(driver, 100)
comment_box = '//*[#id="comment"]'
reply_box ='//*[#id="replies"]'
while(True):
driver.execute_script("window.scrollBy(0, 200);")
try:
reply_box = driver.find_element(By.XPATH, reply_box)
print(reply_box.text)
break
except:
pass
option_button = '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[2]/ytd-comments/ytd-item-section-renderer/div[3]/ytd-comment-thread-renderer[1]/div/ytd-comment-replies-renderer/div[2]/ytd-comment-renderer/div[3]/div[3]/ytd-menu-renderer/yt-icon-button/button'
option_button = wait.until(EC.presence_of_element_located((By.XPATH, option_button)))
driver.execute_script("arguments[0].click();", option_button)
report_button = '/html/body/ytd-app/ytd-popup-container/tp-yt-iron-dropdown/div/ytd-menu-popup-renderer/tp-yt-paper-listbox/ytd-menu-service-item-renderer/tp-yt-paper-item/yt-formatted-string'
report_button = wait.until(EC.presence_of_element_located((By.XPATH,report_button)))
driver.execute_script("arguments[0].click();", report_button)
report_button_spam = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/tp-yt-paper-dialog-scrollable/div/div/yt-options-renderer/div/tp-yt-paper-radio-group/tp-yt-paper-radio-button[1]/div[1]'
report_button_spam = wait.until(EC.presence_of_element_located((By.XPATH, report_button_spam)))
driver.execute_script("arguments[0].click();", report_button_spam)
report_button_send = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog/yt-report-form-modal-renderer/div/yt-button-renderer[2]/a/tp-yt-paper-button'
report_button_send = wait.until(EC.presence_of_element_located((By.XPATH, report_button_send)))
driver.execute_script("arguments[0].click();", report_button_send)
popup_button_done = '/html/body/ytd-app/ytd-popup-container/tp-yt-paper-dialog[2]/yt-confirm-dialog-renderer/div[2]/div[2]/yt-button-renderer[3]/a/tp-yt-paper-button'
popup_button_done = wait.until(EC.presence_of_element_located((By.XPATH, popup_button_done)))
print(popup_button_done.text)
I am trying to automate login to the website https://research.axiscapital.co.in/.
I am able to add username and password. I have also automated solving the captcha. But after it solves the captcha, I am unable to click the login button. I get the WebDriverException: target frame detached exception. I am adding the code below (without the real username and password) for assistance.
NOTE: As soon as the captcha verification expires, the login button becomes clickable again. Kindly help me with it.
import requests
import time
import os
# Added for Selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
# TO MAKE THE SCRAPING FASTER
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver = webdriver.Chrome()
driver.maximize_window()
driver.get("https://research.axiscapital.co.in/")
filename = '1.mp3'
delayTime = 2
googleIBMLink = 'https://speech-to-text-demo.ng.bluemix.net/'
audioToTextDelay = 10
def audioToText(mp3Path):
print("1")
driver.execute_script('''window.open("","_blank");''')
driver.switch_to.window(driver.window_handles[1])
print("2")
driver.get(googleIBMLink)
delayTime = 10
# Upload file
time.sleep(1)
print("3")
# Upload file
time.sleep(1)
root = driver.find_element_by_id('root').find_elements_by_class_name('dropzone _container _container_large')
btn = driver.find_element(By.XPATH, '//*[#id="root"]/div/input')
btn.send_keys('D:\\blogs\\1.mp3')
# Audio to text is processing
time.sleep(delayTime)
#btn.send_keys(path)
print("4")
# Audio to text is processing
time.sleep(audioToTextDelay)
print("5")
text = driver.find_element(By.XPATH, '//*[#id="root"]/div/div[7]/div/div/div').find_elements_by_tag_name('span')
print("5.1")
result = " ".join( [ each.text for each in text ] )
print("6")
driver.close()
driver.switch_to.window(driver.window_handles[0])
print("7")
return result
def saveFile(content,filename):
with open(filename, "wb") as handle:
for data in content.iter_content():
handle.write(data)
wait = WebDriverWait(driver,60)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="Username"]'))).send_keys(username)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#name="Password"]'))).send_keys(password)
time.sleep(1)
googleClass = driver.find_elements_by_class_name('g-recaptcha')[0]
time.sleep(2)
outeriframe = googleClass.find_element_by_tag_name('iframe')
time.sleep(1)
outeriframe.click()
time.sleep(2)
allIframesLen = driver.find_elements_by_tag_name('iframe')
time.sleep(1)
audioBtnFound = False
audioBtnIndex = -1
for index in range(len(allIframesLen)):
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[index]
driver.switch_to.frame(iframe)
driver.implicitly_wait(delayTime)
try:
audioBtn = driver.find_element_by_id('recaptcha-audio-button') or driver.find_element_by_id('recaptcha-anchor')
audioBtn.click()
audioBtnFound = True
audioBtnIndex = index
break
except Exception as e:
pass
if audioBtnFound:
try:
while True:
href = driver.find_element_by_id('audio-source').get_attribute('src')
response = requests.get(href, stream=True)
saveFile(response,filename)
response = audioToText(os.getcwd() + '/' + filename)
print(response)
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[audioBtnIndex]
driver.switch_to.frame(iframe)
inputbtn = driver.find_element_by_id('audio-response')
inputbtn.send_keys(response)
inputbtn.send_keys(Keys.ENTER)
time.sleep(2)
errorMsg = driver.find_elements_by_class_name('rc-audiochallenge-error-message')[0]
if errorMsg.text == "" or errorMsg.value_of_css_property('display') == 'none':
print("Success")
break
except Exception as e:
print(e)
print('Caught. Need to change proxy now')
else:
print('Button not found. This should not happen.')
time.sleep(4)
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[text()="Login"]'))).click()
You forgot to switch to default context after successfully completing the captcha.
put driver.switch_to.default_content() before break.
Edit: the success block would look like this.
print("Success")
driver.switch_to.default_content()
break
I am going to logging to this URL "https://healthunlocked.com/", then extract posts in this community URL "https://healthunlocked.com/positivewellbeing/posts".
I login but then I have an error in the following command line:
all_posts = [post for post in driver.find_element_by_class_name("results-posts").find_elements_by_class_name("results-post") if
"results-post" == post.get_attribute("class")]
The error is an attribute error :AttributeError: 'str' object has no attribute 'find_elements_by_class_name'.
At first, I run the code without logging and it works well, but when I change it to extract posts via a logging account, I see the above error. I insert my whole code below :
import time
from selenium import webdriver
from selenium.common.exceptions import ElementClickInterceptedException
driver_path = r"C:\\Program Files (x86)\\chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('--headless')
driver = webdriver.Chrome(driver_path, options = options)
driver.implicitly_wait(6)
driver.get ("https://healthunlocked.com/")
# click accept cookies
driver.find_element_by_id("ccc-notify-accept").click()
loginpage= driver.find_element_by_id("sitebar-login-button").click()
time.sleep(3)
user_ele = driver.find_element_by_id('email')
user_ele.clear()
user_ele.send_keys('XXXXX')
pass_ele = driver.find_element_by_xpath('//*[#id="password"]')
pass_ele.clear()
pass_ele.send_keys('XXXXX')
# submitting
driver.find_element_by_xpath('/html/body/div[3]/div/div[1]/div/div/section/div[1]/form/button').click()
time.sleep(3)
driver.find_element_by_link_text('Positive Wellbeing During Self-Isolation').click()
time.sleep(3)
driver.find_element_by_link_text('Posts').click()
time.sleep(3)
post_links = set()
while True:
#driver.get("https://healthunlocked.com/positivewellbeing/posts")
all_posts = [post for post in
driver.find_element_by_class_name("results-posts").find_elements_by_class_name("results-post") if
"results-post" == post.get_attribute("class")]
# handle clicking more posts
while len(all_posts) <= len(post_links):
see_more_posts = [btn for btn in driver.find_elements_by_class_name("btn-secondary")
if btn.text == "See more posts"]
try:
see_more_posts[0].click()
except ElementClickInterceptedException:
# handle floating box covering "see more posts" button
driver.execute_script("return document.getElementsByClassName('floating-box-sign-up')[0].remove();")
see_more_posts[0].click()
all_posts = [post for post in driver.find_element_by_class_name("results-posts").find_elements_by_class_name("results-post") if "results-post" == post.get_attribute("class")]
# popoulate links
start_from = len(post_links)
for post in all_posts[start_from:]: # len(post_links): <-- to avoid visiting same links
# save link
link = post.find_element_by_tag_name("a").get_attribute("href")
post_links.add(link)
# visit the site and scrape info
for post_site in list(post_links)[start_from:]:
driver.get(post_site)
#post_text = driver.find_element_by_class_name("post-body").text
post_text = driver.find_element_by_class_name("post-body")
for btn in driver.find_element_by_class_name("post-actions__buttons").find_elements_by_tag_name("button"):
if "Like" in btn.text:
post_like = btn.text.split()[1][1]
#print(f"\n{post_text}\nLikes -->{post_like}\n")
for (post, like) in (zip (post_text, post_like )):
print (post.text, like.text)
print('\n')
So on the post I'd loop the see more post and view more. Then grab all the data etc.
wait = WebDriverWait(driver, 10)
driver.get("https://healthunlocked.com/")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#ccc-notify-accept"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#sitebar-login-button"))).click()
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#email"))).send_keys('a')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#password"))).send_keys('a')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"button.btn--full-width.btn.btn-primary"))).click()
#Go to the page and so forth.
driver.execute_script("window.scrollTo(0,300);")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,"a.floating-box-sign-up__dismiss.icon-close"))).click()
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH,"//a[text()='View more posts']|//button[.='See more posts']"))).click()
except Exception as e:
print(e)
break
all_posts=[x.text for x in driver.find_elements_by_xpath("//div[#class='results-posts']/div[#class='results-post']")]
print(all_posts)
I have been struggling with the Next page button; the scraper manages to click the next page and goes to it, however, it keeps going to the first page and eventually breaks. I only want to scrape all the next pages (in this case there is only one, but might be more of them in the future).
Any ideas on what might be wrong here? Here is the code:
class DatatracSpider(scrapy.Spider):
name = 'data_trac'
start_urls = [
# FOR SALE
'https://www.milieuproperties.com/search-results.aspx?paramb=ADVANCE%20SEARCH:%20Province%20(Western%20Cape),%20%20Area%20(Cape%20Town)']
def __init__(self):
#path to driver
self.driver = webdriver.Chrome('my_path')
def parse(self,response):
self.driver.get(response.url)
url = self.driver.current_url
while True:
try:
elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="ContentPlaceHolder1_lvDataPager1"]/a[text()="Next"]')))
elem.click()
except TimeoutException:
break
WebDriverWait(self.driver, 10).until(lambda driver: self.driver.current_url != url)
url = self.driver.current_url
yield scrapy.Request(url=url, callback=self.parse_page, dont_filter=False)
def parse_page(self, response):
offering = response.css('span#ContentPlaceHolder1_lblbreadcum::text').get()
try:
offering = 'rent' if 'Rental' in offering else 'buy'
except TypeError:
offering = 'buy'
base_link = response.request.url.split('/')
try:
base_link = base_link[0] + '//' + base_link[2] + '/'
except:
pass
for p in response.xpath('//div[#class="ct-itemProducts ct-u-marginBottom30 ct-hover"]'):
link = base_link + p.css('a::attr(href)').get()
yield scrapy.Request(
link,
callback=self.parse_property,
meta={'item': {
'url': link,
'offering': offering,
}},
)
# follow to next page
def parse_property(self, response):
item = response.meta.get('item')
. . .
For this particular web-site your code won't work as after you click Next button URL doesn't change. Try to wait until current page number is changed (instead of waiting for URL to change)
def parse(self,response):
self.driver.get(response.url)
current_page_number = self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text
while True:
try:
elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[#id="ContentPlaceHolder1_lvDataPager1"]/a[text()="Next"]')))
elem.click()
except TimeoutException:
break
WebDriverWait(self.driver, 10).until(lambda driver: self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text != current_page_number)
current_page_number = self.driver.find_element_by_css_selector('#ContentPlaceHolder1_lvDataPager1>span').text
yield scrapy.Request(url=url, callback=self.parse_page, dont_filter=False)
To repeatedly click the next button until the end page do the following.
driver.get('https://www.milieuproperties.com/search-results.aspx?paramb=ADVANCE%20SEARCH:%20Province%20(Western%20Cape),%20%20Area%20(Cape%20Town)')
wait = WebDriverWait(driver, 10)
while True:
try:
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[contains(.,'Next') and not(contains(#class,'aspNetDisabled'))]"))).click()
except:
break
Please check out this logic, where i was able to move to the next pages successfully
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
driver= webdriver.Chrome('') // Please provide your driver path here
driver.get('https://www.milieuproperties.com/search-results.aspx?paramb=ADVANCE%20SEARCH:%20Province%20(Western%20Cape),%20%20Area%20(Cape%20Town)')
driver.find_elements(By.XPATH,'//*[#id="ContentPlaceHolder1_lvDataPager1"]').click();
I have tried so many times trying to upload a photo to a Facebook post. When I read the selenium documents all it says was
Select the <input type="file"> element and call the send_keys() method passing the file path, either the path relative to the test script, or an absolute path.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
usr = "random#gmail.com"
pwd = "randompassword"
driver = webdriver.Firefox()
# or you can use Chrome(executable_path="/usr/bin/chromedriver")
driver.get("http://www.facebook.com/login")
assert "Facebook" in driver.title
elem = driver.find_element_by_id("email")
elem.send_keys(usr)
elem = driver.find_element_by_id("pass")
elem.send_keys(pwd)
elem.send_keys(Keys.RETURN)
elem = driver.find_element_by_css_selector("#u_0_y")
elem.send_keys("Hello Internet :) ")
driver.find_element_by_css_selector("._11b").click()
This works for me..
def main():
# Your Facebook account user and password
usr = "test.fb.post#gmail.com"
pwd = "test123456789"
grp = ['https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/', 'https://www.facebook.com/groups/grpid/',
'https://www.facebook.com/groups/grpid/']
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("detach", True)
chrome_options.add_argument("--disable-infobars")
chrome_options.add_experimental_option("prefs", { \
"profile.default_content_setting_values.notifications": 2 # 1:allow, 2:block
})
driver = webdriver.Chrome(chrome_options=chrome_options)
driver.implicitly_wait(15) # seconds
# Go to facebook.com
driver.get("http://www.facebook.com")
sleep(2)
# Enter user email
elem = driver.find_element_by_id("email")
elem.send_keys(usr)
# Enter user password
elem = driver.find_element_by_id("pass")
elem.send_keys(pwd)
# Login
elem.send_keys(Keys.RETURN)
sleep(10)
for group in grp:
driver.get(group)
try:
try:
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#name='xhpc_message_text']") ))
commentr.click()
except Exception:
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#loggingname='status_tab_selector']") ))
commentr.click()
commentr = WebDriverWait(driver,10).until(EC.element_to_be_clickable( (By.XPATH, "//*[#class='_3u15']") ))
commentr.click()
sleep(3)
l=driver.find_elements_by_tag_name('input')
sleep(1)
for g in l:
if g==driver.find_element_by_xpath("//input[#type='file'][#class='_n _5f0v']"):
sleep(1)
g.send_keys(ipath)
print('image loaded')
sleep(10)
driver.find_element_by_xpath("//*[#class='_1mf _1mj']").send_keys(message)
sleep(1)
buttons = driver.find_elements_by_tag_name("button")
sleep(1)
for button in buttons:
if button.text == "Post":
sleep(5)
button.click()
sleep(10)
except Exception:
pass
print ('image not posted in '+group)
driver.close()
if __name__ == '__main__':
main()
Instead of using css_selector, try using xpath.
statuselement=driver.find_element_by_xpath("//[#name='xhpc_message']").click() driver.find_element_by_xpath("//[#class='_3jk']").click() l=driver.find_elements_by_tag_name('input') ipath="C:\Users\Utente\Pictures\CoutureBeardsley.jpg" for g in l: if g==driver.find_element_by_xpath("//input[#type='file'][#class='_n _5f0v']"): g.send_keys(ipath) print('image loaded')