I was doing some crawling stuff with selenium.
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd
instagram_id="username"
instagram_pw="password"
_id = driver.find_element(By.NAME, 'username')
_id.send_keys(instagram_id)
time.sleep(2)
_password = driver.find_element(By.NAME, 'password')
_password.send_keys(instagram_pw)
time.sleep(2)
login_button = driver.find_element(By.CSS_SELECTOR, '.sqdOP.L3NKy.y3zKF').click()
time.sleep(5) #press login button
_keyword = '교토'
driver.get('https://www.instagram.com/explore/tags/' + _keyword + '/') #instagram serch
driver.find_element(By.CSS_SELECTOR, 'div.v1Nh3.kIKUG._bz0w').click()
time.sleep(5) #open first post
There was no problem so far
But in here, NoSuchElementException Error occurs.
results = []
count = 200
for i in range(count):
data = driver.find_elements(By.CSS_SELECTOR, 'a.xil3i') #save hashtag info
for j in range(len(data)):
results.append(data[j].text.replace("#","")) #remove'#'
if (i+1)%10 == 0:
print('{}번째 게시물 완료'.format(i+1))
driver.find_element(By.CSS_SELECTOR, 'a._65Bje.coreSpriteRightPaginationArrow').click() #다음 게시물로 이동
time.sleep(5)
help me fix that error plz.
Thanks
Related
I am trying to send messages automatically on LinkedIn. But I'm stuck in the phase of closing the chat window. How can I fix it?
It's my first time using Selenium. Please help me.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://linkedin.com")
time.sleep(2)
username = driver.find_element(by=By.XPATH, value="//input[#name='session_key']")
password = driver.find_element(by=By.XPATH, value="//input[#name='session_password']")
username.send_keys("hr#unitedmerchant.com")
password.send_keys("UMSUpos2022")
time.sleep(2)
submit = driver.find_element(by=By.XPATH, value="//button[#type='submit']").click()
time.sleep(2)
import random
n_pages = 3
for n in range(1, n_pages + 1):
driver.get("https://www.linkedin.com/search/results/people/?network=%5B%22F%22%5D&origin=FACETED_SEARCH&page=" + str(n))
time.sleep(2)
all_buttons = driver.find_elements(By.TAG_NAME, 'button')
message_buttons = [btn for btn in all_buttons if btn.text == "Message"]
for i in range(2, 3):
#click on "Message" button
driver.execute_script("arguments[0].click();", message_buttons[i])
time.sleep(2)
#activate main div
main_div = driver.find_element(by=By.XPATH, value="//div[starts-with(#class, 'msg-form__msg-content-container')]")
driver.execute_script("arguments[0].click();", main_div)
#type message
paragraphs = driver.find_elements(By.TAG_NAME, 'p')
all_span = driver.find_elements(By.TAG_NAME, 'span')
all_span = [s for s in all_span if s.get_attribute("aria-hidden") == "true"]
idx = [*range(3,23,2)]
greetings = ["Hello", "Hi", "Hey"]
all_names = []
for j in idx:
name = all_span[j].text.split(" ")[0]
all_names.append(name)
greetings_idx = random.randint(0, len(greetings)-1)
message = greetings[greetings_idx] + " " + all_names[i] + ", Sorry, I didn't mean to bother you, I'm just tesing a Linkedin capabilities... My apologies!:) This is not Bluu, this message is automated."
paragraphs[-5].send_keys(message)
time.sleep(2)
#send message
submit = driver.find_element(by=By.XPATH, value="//button[#type='submit']").click()
time.sleep(2)
#close div
close_button = driver.find_elements(By.CLASS_NAME, 'msg-overlay-bubble-header__control')
driver.execute_script("arguments[0].click();", close_button)
time.sleep(2)
<Here's my error message. This section below is my Error message come up.>
There is a problem with closing the chat window. I even cannot find close trigger using finders in selenium.
Please help me.
---------------------------------------------------------------------------
JavascriptException Traceback (most recent call last)
Input In [19], in <cell line: 5>()
46 #close div
47 close_button = driver.find_elements(By.CLASS_NAME, 'msg-overlay-bubble-header__control')
---> 48 driver.execute_script("arguments[0].click();", close_button)
49 time.sleep(2)
JavascriptException: Message: javascript error: arguments[0].click is not a function
(Session info: chrome=105.0.5195.128)
You can use the selector [type="cancel-icon"].artdeco-button__icon. You can either use
for button in driver.find_elements(By.CSS_SELECTOR, '[type="cancel-icon"].artdeco-button__icon'):
button.click()
# can run into errors if the button isn't visible or if another element is blocking it
or:
driver.execute_script("document.querySelectorAll('[type=\"cancel-icon\"].artdeco-button__icon').forEach(e => e.click())")
Which will close all open menus/chat boxes.
I tried XPath using id and it worked.
driver.find_element(By.XPATH, '//button[#id="ember304"]').click()
So I have been using python for twitter scraping using selenium driver. In my code most part of code is working, when I am only scraping a single tweet it's working. But when I am repeating the process for more than one tweet its not working. It is scrolling through the new tweets but not scraping anything. It is not saving anything. I have used this code : https://github.com/israel-dryer/Twitter-Scraper/blob/main/twitter-scraper-tut.ipynb
Please help me to correct the mistake in my code. Here is my code:
import time
import csv
from getpass import getpass
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver import Chrome
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.twitter.com/login')
driver.maximize_window()
time.sleep(5)
username = driver.find_element(by=By.XPATH, value='//input[#name="text"]')
username.send_keys('ABC')
username.send_keys(Keys.RETURN)
time.sleep(5)
password = driver.find_element(by=By.XPATH, value='//input[#name="password"]')
password.send_keys('XYZ')
password.send_keys(Keys. RETURN)
time.sleep(5)
search_input = driver.find_element(by=By.XPATH, value='//input[#aria-label="Search query"]')
# '//input[#aria-label-"Search query"]'
search_input.send_keys('#NEET')
search_input.send_keys(Keys.RETURN)
time.sleep(5)
driver.find_element(by=By.LINK_TEXT,value='Latest').click()
time.sleep(5)
def get_tweet_data(card):
find_username=card.find_element(by=By.XPATH, value='.//span').text
try:
find_twitter=card.find_element(by=By.XPATH, value='.//span[contains(text(), "#")]').text
except NoSuchElementException:
return
try:
find_date=card.find_element(by=By.XPATH, value='.//time').get_attribute('datetime')
except NoSuchelementException :
return
find_tweets1=card.find_element(by=By.XPATH, value='.//div[2]/div[2]/div[1]').text
find_tweets2=card.find_element(by=By.XPATH, value='.//div[2]/div[2]/div[2]').text
final_tweet=find_tweets1+find_tweets2
find_retweet=card.find_element(by=By.XPATH, value='.//div[#data-testid="retweet"]').text
find_reply=card.find_element(by=By.XPATH, value='.//div[#data-testid="reply"]').text
find_likes=card.find_element(by=By.XPATH, value='.//div[#data-testid="like').text
tweet=(find_username,find_twitter,find_date,final_tweet,find_retweet,find_reply,find_likes)
return tweet
data = []
tweet_ids = set()
last_position = driver.execute_script("return window.pageYOffset;")
scrolling = True
while scrolling:
page_cards = driver.find_elements(by=By.XPATH, value='//div[#data-testid="tweet"]')
for card in page_cards[-15:]:
tweet = get_tweet_data(card)
if tweet:
tweet_id = ''.join(tweet)
if tweet_id not in tweet_ids:
tweet_ids.add(tweet_id)
data.append(tweet)
scroll_attempt = 0
while True:
# check scroll position
driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
time.sleep(5)
curr_position = driver.execute_script("return window.pageYOffset;")
if last_position == curr_position:
scroll_attempt += 1
# end of scroll region
if scroll_attempt >= 3:
scrolling = False
break
else:
time.sleep(5) # attempt another scroll
else:
last_position = curr_position
break
# close the web driver
driver.close()
with open('scrappeddata1.csv', 'w', newline='', encoding='utf-8') as f:
header = ['find_username','find_twitter','find_date','final_tweet','find_retweet','find_reply','find_likes']
writer = csv.writer(f)
writer.writerow(header)
writer.writerows(data)
Please help to solve the issue
I am trying to automate login to the website https://research.axiscapital.co.in/.
I am able to add username and password. I have also automated solving the captcha. But after it solves the captcha, I am unable to click the login button. I get the WebDriverException: target frame detached exception. I am adding the code below (without the real username and password) for assistance.
NOTE: As soon as the captcha verification expires, the login button becomes clickable again. Kindly help me with it.
import requests
import time
import os
# Added for Selenium
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
# TO MAKE THE SCRAPING FASTER
chrome_options = Options()
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(chrome_options=chrome_options)
driver = webdriver.Chrome()
driver.maximize_window()
driver.get("https://research.axiscapital.co.in/")
filename = '1.mp3'
delayTime = 2
googleIBMLink = 'https://speech-to-text-demo.ng.bluemix.net/'
audioToTextDelay = 10
def audioToText(mp3Path):
print("1")
driver.execute_script('''window.open("","_blank");''')
driver.switch_to.window(driver.window_handles[1])
print("2")
driver.get(googleIBMLink)
delayTime = 10
# Upload file
time.sleep(1)
print("3")
# Upload file
time.sleep(1)
root = driver.find_element_by_id('root').find_elements_by_class_name('dropzone _container _container_large')
btn = driver.find_element(By.XPATH, '//*[#id="root"]/div/input')
btn.send_keys('D:\\blogs\\1.mp3')
# Audio to text is processing
time.sleep(delayTime)
#btn.send_keys(path)
print("4")
# Audio to text is processing
time.sleep(audioToTextDelay)
print("5")
text = driver.find_element(By.XPATH, '//*[#id="root"]/div/div[7]/div/div/div').find_elements_by_tag_name('span')
print("5.1")
result = " ".join( [ each.text for each in text ] )
print("6")
driver.close()
driver.switch_to.window(driver.window_handles[0])
print("7")
return result
def saveFile(content,filename):
with open(filename, "wb") as handle:
for data in content.iter_content():
handle.write(data)
wait = WebDriverWait(driver,60)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#id="Username"]'))).send_keys(username)
wait.until(EC.element_to_be_clickable((By.XPATH, '//input[#name="Password"]'))).send_keys(password)
time.sleep(1)
googleClass = driver.find_elements_by_class_name('g-recaptcha')[0]
time.sleep(2)
outeriframe = googleClass.find_element_by_tag_name('iframe')
time.sleep(1)
outeriframe.click()
time.sleep(2)
allIframesLen = driver.find_elements_by_tag_name('iframe')
time.sleep(1)
audioBtnFound = False
audioBtnIndex = -1
for index in range(len(allIframesLen)):
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[index]
driver.switch_to.frame(iframe)
driver.implicitly_wait(delayTime)
try:
audioBtn = driver.find_element_by_id('recaptcha-audio-button') or driver.find_element_by_id('recaptcha-anchor')
audioBtn.click()
audioBtnFound = True
audioBtnIndex = index
break
except Exception as e:
pass
if audioBtnFound:
try:
while True:
href = driver.find_element_by_id('audio-source').get_attribute('src')
response = requests.get(href, stream=True)
saveFile(response,filename)
response = audioToText(os.getcwd() + '/' + filename)
print(response)
driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[audioBtnIndex]
driver.switch_to.frame(iframe)
inputbtn = driver.find_element_by_id('audio-response')
inputbtn.send_keys(response)
inputbtn.send_keys(Keys.ENTER)
time.sleep(2)
errorMsg = driver.find_elements_by_class_name('rc-audiochallenge-error-message')[0]
if errorMsg.text == "" or errorMsg.value_of_css_property('display') == 'none':
print("Success")
break
except Exception as e:
print(e)
print('Caught. Need to change proxy now')
else:
print('Button not found. This should not happen.')
time.sleep(4)
wait.until(EC.element_to_be_clickable((By.XPATH, '//button[text()="Login"]'))).click()
You forgot to switch to default context after successfully completing the captcha.
put driver.switch_to.default_content() before break.
Edit: the success block would look like this.
print("Success")
driver.switch_to.default_content()
break
import csv
import time
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from csv import reader
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
chrome_options = Options()
scroll = 5
chrome_options.add_experimental_option("useAutomationExtension", False)
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
header_added = False
header_added1 = False
url = "url"
driver = webdriver.Chrome(executable_path='C:/chromedriver.exe', options=chrome_options)
driver.maximize_window()
driver.get(url)
time.sleep(3)
search_city = input("Enter the city :")
res_n = input("Enter the Restaurant's name :")
search = driver.find_element_by_xpath('//input[#name="location"]').send_keys(search_city)
time.sleep(2)
driver.find_element_by_xpath('//*[#id="root"]/div[1]/div[1]/div/div[1]/div[1]/div/div[2]/div/div[3]/div[1]/span[2]').click()
time.sleep(3)
driver.find_element_by_xpath('/html/body/div[1]/div[1]/header/div/div/ul/li[5]/div/a/span[1]').click()
time.sleep(1)
search_res = driver.find_element_by_class_name('_2BJMh').send_keys(res_n.lower())
time.sleep(5)
driver.find_element_by_class_name('_2BJMh').send_keys(Keys.RETURN)
time.sleep(5)
try:
driver.find_element_by_class_name('_3FR5S').click()
time.sleep(5)
except:
print("restaurant not open")
driver.quit()
html = driver.find_element_by_tag_name('html')
def get_items():
global header_added
global item_dvs
cats = driver.find_elements_by_class_name('D_TFT')
cats[1].click()
time.sleep(3)
item_dvs = driver.find_elements_by_class_name('_2wg_t')
for div in item_dvs:
name = div.find_element_by_class_name('styles_itemNameText__3bcKX')
print(name.text)
price = div.find_element_by_class_name('rupee')
print(price.text)
if div.find_elements_by_class_name('styles_itemDesc__MTsVd'):
desc = div.find_element_by_class_name('styles_itemDesc__MTsVd').text
else:
desc = None
if div.find_element_by_css_selector('div._1C1Fl._23qjy'):
element = div.find_element_by_css_selector('div._1C1Fl._23qjy')
print("found")
driver.execute_script("arguments[0].scrollIntoView();", element)
add = div.find_element_by_css_selector('._1RPOp')
driver.execute_script("arguments[0].click();", add)
time.sleep(1)
add_ons = driver.find_element_by_class_name('_3UzO2').text
print(add_ons)
driver.find_element_by_css_selector('#modal-placeholder > div:nth-child(3) > div > div._1Kr-y._3EeZR > div > div._1EZLh > div > button').click()
else:
add_ons = None
dict1 = {'Item Name': name.text, "Price": price.text, "Add Ons :": add_ons, "Description": desc}
with open(f'{search_city}_{res_n}.csv', 'a+', encoding='utf-8-sig') as f:
w = csv.DictWriter(f, dict1.keys())
if not header_added:
w.writeheader()
header_added = True
w.writerow(dict1)
get_items()
The is_cust loop keeps running over and over again opening the same element, while the rest of the code moves on to the next divs. What is wrong here?
xPath are bidirectional and is probably the cause here.
Try this code using cssSelector:
for div in item_dvs:
#Do Something
try:
is_cust = div.find_element_by_css_selector('._1C1Fl._23qjy')
print("found")
except NoSuchElementException:
continue
driver.execute_script("arguments[0].scrollIntoView();", is_cust)
add = div.find_element_by_css_selector('._1RPOp')
driver.execute_script("arguments[0].click();", add)
time.sleep(1)
# Not sure why for this one you had driver instead of div. Suspect div should be
add_ons = div.find_element_by_class_name('_26cJ9').text
div.find_element_by_css_selector('#modal-placeholder > div:nth-child(3) > div > div._1Kr-y._3EeZR > div > div._1EZLh > div > button').click()
UPDATE
From your updated code, you are using lot of hardcoded sleep. I will suggest to use the WebDriverWait with expected_conditions.
More info here: Wait from Selenium
Imports needed:
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
Code to be added post driver creation:
wait_time = 5
wait = WebDriverWait(driver, wait_time)
Instead of using sleep like this:
time.sleep(5)
driver.find_element_by_class_name('_2BJMh').send_keys(Keys.RETURN)
time.sleep(5)
Use:
wait.until(EC.presence_of_element_located((By.CLASS_NAME, '_2BJMh'))).send_keys(res_n.lower())
Don't gather the element twice.. use find_elements_by* then validate the length:
descs = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'styles_itemDesc__MTsVd')))
if len(descs) > 0:
desc = descs[0].text
else:
desc = None
from selenium import webdriver
import time
driver = webdriver.Chrome()
def login(usr,pss):
driver.get("https://www.instagram.com/")
time.sleep(2)
usr_nm = driver.find_element_by_xpath('//*[#id="loginForm"]/div/div[1]/div/label/input')
usr_nm.clear()
usr_nm.send_keys(usr)
time.sleep(2)
pss_nm = driver.find_element_by_xpath('//*[#id="loginForm"]/div/div[2]/div/label/input')
pss_nm.clear()
pss_nm.send_keys(pss)
time.sleep(2)
login = driver.find_element_by_xpath('//*[#id="loginForm"]/div/div[3]')
login.click()
time.sleep(2)
saveinfo = driver.find_element_by_xpath('//*[#id="react-root"]/section/main/div/div/div/section/div/button')
saveinfo.click()
time.sleep(2)
noti = driver.find_element_by_xpath('/html/body/div[4]/div/div/div/div[3]/button[2]')
noti.click()
time.sleep(2)
def like(hashtag):
driver.get(f"https://www.instagram.com/explore/tags/{hashtag}/")
time.sleep(2)
# for scroll down
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
href_found = driver.find_elements_by_tag_name("a")
pic_href = [ele.get_attribute('href') for ele in href_found if '.com/p' in ele.get_attribute('href')]
for ele in pic_href:
driver.get(ele)
After this the code is not working, I tried placing it in the loop but it stops working and throws an error that 'list doesn't have attribute click( )'
like = driver.find_elements_by_xpath('/html/body/div[4]/div[2]/div/article/div[3]/section[1]/span[1]/button/div/span')
like.click()
time.sleep(2)
login(<your username >,<your password>)
like('hacking')