hi i try to write a bot that automates once a day. Logged into my LinkedIn chat with a person who sends me posts and every message i wiil have post and bot need to to respond my email and write send my you cv something similar to this.
def start():
driver.get("https://www.linkedin.com")
time.sleep(3)
username = driver.find_element(By.ID, 'session_key')
username.send_keys(MyUsername)
time.sleep(0.7)
password = driver.find_element(By.ID, "session_password")
password.send_keys(MyPassword)
time.sleep(0.3)
button_sign = driver.find_element(By.XPATH, '//*[#type="submit"]')
button_sign.click()
time.sleep(3)
# scroll_down_random()
button_messaging = driver.find_element(By.XPATH, '//*[#id="global-nav"]/div/nav/ul/li[4]/a')
button_messaging.click()
time.sleep(2)
search_chat = driver.find_element(By.XPATH, '//*[#id="search-conversations"]')
search_chat.send_keys(name_of_chat)
search_chat.send_keys(Keys.ENTER)
time.sleep(2)
submit_application = driver.find_element(By.ID,'//*[#id="ember65"]')
submit_application.click()
but I am stuck on two things one After I search I can't select the first chat and I Run in a loop between messages in posts.
after
submit_application = driver.find_element(By.ID,'//*[#id="ember65"]')
submit_application.click()
need to click the first person after I search. and how I can sty always connected if I upload it to the cloud without having to connect every time
I think the ID you are searching for is incomplete, I get an ID like
"overlay-conversation-card-ember133"
This might be different for different devices so I believe you should use XPATH here rather than the ID.
Hope this helps. 👍🏻
EDIT:
It seems selenium .click() does not always register, use .submit() although this doesn't work for all browsers incase of that use .send_keys(Keys.ENTER)
I have decided to attempt to create a simple web scraper script in python. As a small challenge I decided to create a script which will be able to log me into facebook and fetch the current birthdays displayed in the side. I have managed to write a script which is able to log me into my facebook, however I have no idea how to fetch the birthdays displayed.
This is my scrypt.
from selenium import webdriver
from time import sleep
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
usr = 'EMAIL'
pwd = 'PASSWORD'
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.facebook.com/')
print ("Opened facebook")
sleep(1)
username_box = driver.find_element_by_id('email')
username_box.send_keys(usr)
print ("Email Id entered")
sleep(1)
password_box = driver.find_element_by_id('pass')
password_box.send_keys(pwd)
print ("Password entered")
login_box = driver.find_element_by_id('u_0_b')
login_box.click()
print ("Login Sucessfull")
print ("Fetched needed data")
input('Press anything to quit')
driver.quit()
print("Finished")
This is my first time creating a script of this type. My assumption is that I am supposed to traverse through the children of the "jsc_c_3d" div element until I get to the displayed birthdays. Furthermore the id of this element changes everytime the page is refreshed. Can anyone tell me how this is done or if this is the right way that I should go on about solving this problem?
The div for the birthday after expecting elements:
<div class="" id="jsc_c_3d">
<div class="j83agx80 cbu4d94t ew0dbk1b irj2b8pg">
<div class="qzhwtbm6 knvmm38d"><span class="oi732d6d ik7dh3pa d2edcug0 qv66sw1b c1et5uql
a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh oo9gr5id hzawbc8m" dir="auto">
<strong>Bobi Mitrevski</strong>
and
<strong>Trajce Tusev</strong> have birthdays today.</span></div></div></div>
You are correct that you would need to traverse through the inner elements of jsc_c_3d to extract the birthdays that you want. However this whole automated web-scraping is a problem if the id value is dynamic, such that it changes on each occasion. In this case, text parsers such as bs4 would do the job.
With the bs4 approach you simply have to extract the relevant div tags from the DOM and then you can parse the data to obtain the required contents.
More generally, this problem is solvable using the Facebook-API which could be as simple as
import facebook
token = 'a token' # token omitted here, this is the same token when I use in https://developers.facebook.com/tools/explorer/
graph = facebook.GraphAPI(token)
args = {'fields' : 'birthday,name' }
friends = graph.get_object("me/friends",**args)
I want to extract all the fantasy teams that have been entered for past contests. To loop through the dates, I just change a small part of the URL as shown in my code below:
#Packages:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as ec
import pandas as pd
# Driver
chromedriver =("C:/Users/Michel/Desktop/python/package/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(chromedriver)
# Dataframe that will be use later
results = pd.DataFrame()
best_lineups=pd.DataFrame()
opti_lineups=pd.DataFrame()
#For loop over all DATES:
calendar=[]
calendar.append("2019-01-10")
calendar.append("2019-01-11")
for d in calendar:
driver.get("https://rotogrinders.com/resultsdb/date/"+d+"/sport/4/")
Then, to access the different contests of that day, you need to click on the contest tab. I use the following code to locate and click on it.
# Find "Contest" tab
contest= driver.find_element_by_xpath("//*[#id='root']/div/main/main/div[2]/div[3]/div/div/div[1]/div/div/div/div/div[3]")
contest.click()
I simply inspect and copy the xpath of the tab. However, most of the times it is working, but sometimes I get an error message " Unable to locate element...". Moreover, it seems to work only for the first date in my calendar loop and always fails in the next iteration... I do not know why. I try to locate it differently, but I feel I am missing something such as:
contests=driver.find_element_by_xpath("//*[#role='tab']
Once, the contest tab is successfully clicked, all contests of that day are there and you can click on a link to access all the entries of that contest. I stored the contests in order to iterate throuhg all as follow:
list_links = driver.find_elements_by_tag_name('a')
hlink=[]
for ii in list_links:
hlink.append(ii.get_attribute("href"))
sub="https://rotogrinders.com/resultsdb"
con= "contest"
contest_list=[]
for text in hlink:
if sub in text:
if con in text:
contest_list.append(text)
# Iterate through all the entries(user) of a contest and extract the information of the team entered by the user
for c in contest_list:
driver.get(c)
Then, I want to extract all participants team entered in the contest and store it in a dataframe. I am able to do it successfully for the first page of the contest.
# Waits until tables are loaded and has text. Timeouts after 60 seconds
while WebDriverWait(driver, 60).until(ec.presence_of_element_located((By.XPATH, './/tbody//tr//td//span//a[text() != ""]'))):
# while ????:
# Get tables to get the user names
tables = pd.read_html(driver.page_source)
users_df = tables[0][['Rank','User']]
users_df['User'] = users_df['User'].str.replace(' Member', '')
# Initialize results dataframe and iterate through users
for i, row in users_df.iterrows():
rank = row['Rank']
user = row['User']
# Find the user name and click on the name
user_link = driver.find_elements(By.XPATH, "//a[text()='%s']" %(user))[0]
user_link.click()
# Get the lineup table after clicking on the user name
tables = pd.read_html(driver.page_source)
lineup = tables[1]
#print (user)
#print (lineup)
# Restructure to put into resutls dataframe
lineup.loc[9, 'Name'] = lineup.iloc[9]['Salary']
lineup.loc[10, 'Name'] = lineup.iloc[9]['Pts']
temp_df = pd.DataFrame(lineup['Name'].values.reshape(-1, 11),
columns=lineup['Pos'].iloc[:9].tolist() + ['Total_$', 'Total_Pts'] )
temp_df.insert(loc=0, column = 'User', value = user)
temp_df.insert(loc=0, column = 'Rank', value = rank)
temp_df["Date"]=d
results = results.append(temp_df)
#next_button = driver.find_elements_by_xpath("//button[#type='button']")
#next_button[2].click()
results = results.reset_index(drop=True)
driver.close()
However, there are other pages and to access it, you need to click on the small arrow next buttonat the bottom. Moreover, you can click indefinitely on that button; even if there are not more entries. Therefore, I would like to be able to loop through all pages with entries and stop when there are no more entries and change contest. I try to implement a while loop to do so, but my code did not work...
You must really make sure that page loads completely before you do anything on that page.
Moreover, it seems to work only for the first date in my calendar loop
and always fails in the next iteration
Usually when selenium loads a browser page it tries to look for the element even if it is not loaded all the way. I suggest you to recheck the xpath of the element you are trying to click.
Also try to see when the page loads completely and use time.sleep(number of seconds)
to make sure you hit the element or you can check for a particular element or a property of element that would let you know that the page has been loaded.
One more suggestion is that you can use driver.current_url to see which page are you targetting. I have had this issue while i was working on multiple tabs and I had to tell python/selenium to manually switch to that tab
I have created a screen scraping program using selenium, which prints out a few variables. I want to take the numbers it spits out and compare it to numbers in a text document. I am unsure on the process of going about this. What would be the best way to go about this. The text file will contain a 3 numbers which will be compared to 3 numbers that have been screen scraped.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
#The above is downloading the needed files for this code to work
chrome_path = r"C:\Users\ashabandha\Downloads\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get("https://signin.acellus.com/SignIn/index.html")
time.sleep(2)
username = driver.find_element_by_id("Name")
password = driver.find_element_by_id("Psswrd")
username.send_keys("my login")
password.send_keys("my password")
time.sleep(2)
driver.find_element_by_xpath("""//*[#id="loginform"]/table[2]/tbody/tr/td[2]/input""").click()
#The program has now signed in and is going to navigate to the progress tab
time.sleep(2)
driver.get("https://admin252.acellus.com/StudentFunctions/progress.html?ClassID=484")
time.sleep(2)
#now we are on the progress tab
posts = driver.find_elements_by_class_name("Object7069")
time.sleep(2)
for post in posts:
print (post.text)
#this gives me the first class log
time.sleep(2)
driver.get("https://admin252.acellus.com/StudentFunctions/progress.html?ClassID=326")
#This gives me second class log
time.sleep(2)
posts = driver.find_elements_by_class_name("Object7069")
time.sleep(2)
for post in posts:
print (post.text)
time.sleep(2)
driver.get("https://admin252.acellus.com/StudentFunctions/progress.html?ClassID=292")
posts = driver.find_elements_by_class_name("Object7069")
time.sleep(2)
for post in posts:
print (post.text)
Save selenium output on a data structure, like list or dictionary, then open the file, extract the info you want to compare it to and do the algorithm or expression you wish to it: https://www.python.org/doc/
check out working with file.
Can anyone tell me how to access the underlying URL to view a given user's Instagram followers? I am able to do this with Instagram API, but given the pending changes to the approval process, I have decided to switch to scraping.
The Instagram web browser allows you to view the follower list for any given public user - for example, to view Instagram's followers, visit "https://www.instagram.com/instagram", and then click on the followers URL to open a window that paginates through viewers (note: you must be logged in to your account to view this).
I note that the URL changes to "https://www.instagram.com/instagram/followers" when this window pops up, but I can't seem to view the underlying page source for this URL.
Since it appears on my browser window, I assume that I will be able to scrape. But do I have to use a package like Selenium? Does anyone know what the underlying URL is, so I don't have to use Selenium?
As an example, I am able to directly access the underlying feed data by visiting "instagram.com/instagram/media/", from which I can scrape and paginate through all iterations. I would like to do something similar with the list of followers, and access this data directly (rather than using Selenium).
EDIT: Dec 2018 Update:
Things have changed in Insta land since this was posted. Here is an updated script that is a bit more pythonic and better utilizes XPATH/CSS paths.
Note that to use this updated script, you must install the explicit package (pip install explicit), or convert each line with waiter to a pure selenium explicit wait.
import itertools
from explicit import waiter, XPATH
from selenium import webdriver
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
waiter.find_write(driver, "//div/input[#name='username']", username, by=XPATH)
waiter.find_write(driver, "//div/input[#name='password']", password, by=XPATH)
waiter.find_element(driver, "//div/button[#type='submit']", by=XPATH).click()
# Wait for the user dashboard page to load
waiter.find_element(driver, "//a/span[#aria-label='Find People']", by=XPATH)
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
# driver.find_element_by_partial_link_text("follower").click()
waiter.find_element(driver, "//a[#href='/instagram/followers/']", by=XPATH).click()
# Wait for the followers modal to load
waiter.find_element(driver, "//div[#role='dialog']", by=XPATH)
# At this point a Followers modal pops open. If you immediately scroll to the bottom,
# you hit a stopping point and a "See All Suggestions" link. If you fiddle with the
# model by scrolling up and down, you can force it to load additional followers for
# that person.
# Now the modal will begin loading followers every time you scroll to the bottom.
# Keep scrolling in a loop until you've hit the desired number of followers.
# In this instance, I'm using a generator to return followers one-by-one
follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality
for group in itertools.count(start=1, step=12):
for follower_index in range(group, group + 12):
yield waiter.find_element(driver, follower_css.format(follower_index)).text
# Instagram loads followers 12 at a time. Find the last follower element
# and scroll it into view, forcing instagram to load another 12
# Even though we just found this elem in the previous for loop, there can
# potentially be large amount of time between that call and this one,
# and the element might have gone stale. Lets just re-acquire it to avoid
# that
last_follower = waiter.find_element(driver, follower_css.format(follower_index))
driver.execute_script("arguments[0].scrollIntoView();", last_follower)
if __name__ == "__main__":
account = 'instagram'
driver = webdriver.Chrome()
try:
login(driver)
# Print the first 75 followers for the "instagram" account
print('Followers of the "{}" account'.format(account))
for count, follower in enumerate(scrape_followers(driver, account=account), 1):
print("\t{:>3}: {}".format(count, follower))
if count >= 75:
break
finally:
driver.quit()
I did a quick benchmark to show how performance decreases exponentially the more followers you attempt to scrape this way:
$ python example.py
Followers of the "instagram" account
Found 100 followers in 11 seconds
Found 200 followers in 19 seconds
Found 300 followers in 29 seconds
Found 400 followers in 47 seconds
Found 500 followers in 71 seconds
Found 600 followers in 106 seconds
Found 700 followers in 157 seconds
Found 800 followers in 213 seconds
Found 900 followers in 284 seconds
Found 1000 followers in 375 seconds
Original post:
Your question is a little confusing. For instance, I'm not really sure what "from which I can scrape and paginate through all iterations" actually means. What are you currently using to scrape and paginate?
Regardless, instagram.com/instagram/media/ is not the same type of endpoint as instagram.com/instagram/followers. The media endpoint appears to be a REST API, configured to return an easily parseable JSON object.
The followers endpoint isn't really a RESTful endpoint from what I can tell. Rather, Instagram AJAXs in the information to the page source (using React?) after you click the Followers button. I don't think you will be able to get that information without using something like Selenium, which can load/render the javascript that displays the followers to the user.
This example code will work:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
driver.find_element_by_xpath("//div/input[#name='username']").send_keys(username)
driver.find_element_by_xpath("//div/input[#name='password']").send_keys(password)
driver.find_element_by_xpath("//span/button").click()
# Wait for the login page to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
xpath = "//div[#style='position: relative; z-index: 1;']/div/div[2]/div/div[1]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
# You'll need to figure out some scrolling magic here. Something that can
# scroll to the bottom of the followers modal, and know when its reached
# the bottom. This is pretty impractical for people with a lot of followers
# Finally, scrape the followers
xpath = "//div[#style='position: relative; z-index: 1;']//ul/li/div/div/div/div/a"
followers_elems = driver.find_elements_by_xpath(xpath)
return [e.text for e in followers_elems]
if __name__ == "__main__":
driver = webdriver.Chrome()
try:
login(driver)
followers = scrape_followers(driver, "instagram")
print(followers)
finally:
driver.quit()
This approach is problematic for a number of reasons, chief among them being how slow it is, relative to the the API.
Update: March 2020
This is just the Levi answer with a small updates in some parts because as it is now, it didn't quit the driver successfully. This also gets by default all the followers, as everyone else have said, it's not intended for a lot of followers.
import itertools
from explicit import waiter, XPATH
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from time import sleep
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
sleep(3)
# Login
driver.find_element_by_name("username").send_keys(username)
driver.find_element_by_name("password").send_keys(password)
submit = driver.find_element_by_tag_name('form')
submit.submit()
# Wait for the user dashboard page to load
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
# driver.find_element_by_partial_link_text("follower").click
sleep(2)
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
waiter.find_element(driver, "//div[#role='dialog']", by=XPATH)
allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text)
# At this point a Followers modal pops open. If you immediately scroll to the bottom,
# you hit a stopping point and a "See All Suggestions" link. If you fiddle with the
# model by scrolling up and down, you can force it to load additional followers for
# that person.
# Now the modal will begin loading followers every time you scroll to the bottom.
# Keep scrolling in a loop until you've hit the desired number of followers.
# In this instance, I'm using a generator to return followers one-by-one
follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality
for group in itertools.count(start=1, step=12):
for follower_index in range(group, group + 12):
if follower_index > allfoll:
raise StopIteration
yield waiter.find_element(driver, follower_css.format(follower_index)).text
# Instagram loads followers 12 at a time. Find the last follower element
# and scroll it into view, forcing instagram to load another 12
# Even though we just found this elem in the previous for loop, there can
# potentially be large amount of time between that call and this one,
# and the element might have gone stale. Lets just re-acquire it to avoid
# tha
last_follower = waiter.find_element(driver, follower_css.format(group+11))
driver.execute_script("arguments[0].scrollIntoView();", last_follower)
if __name__ == "__main__":
account = "" # <account to check>
driver = webdriver.Firefox(executable_path="./geckodriver")
try:
login(driver)
print('Followers of the "{}" account'.format(account))
for count, follower in enumerate(scrape_followers(driver, account=account), 1):
print("\t{:>3}: {}".format(count, follower))
finally:
driver.quit()
I noticed that the previous answer no longer works, so I made an updated version based on the previous answer, which includes the scrolling feature (to get all the users in the list, not just those loaded initially). In addition, this scrapes both followers and following. (You'll need to download chromedriver as well)
import time
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# The account you want to check
account = ""
# Chrome executable
chrome_binary = r"chrome.exe" # Add your path here
def login(driver):
username = "" # Your username
password = "" # Your password
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
driver.find_element_by_xpath("//div/input[#name='username']").send_keys(username)
driver.find_element_by_xpath("//div/input[#name='password']").send_keys(password)
driver.find_element_by_xpath("//span/button").click()
# Wait for the login page to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
SCROLL_PAUSE = 0.5 # Pause to allow loading of content
driver.execute_script("followersbox = document.getElementsByClassName('_gs38e')[0];")
last_height = driver.execute_script("return followersbox.scrollHeight;")
# We need to scroll the followers modal to ensure that all followers are loaded
while True:
driver.execute_script("followersbox.scrollTo(0, followersbox.scrollHeight);")
# Wait for page to load
time.sleep(SCROLL_PAUSE)
# Calculate new scrollHeight and compare with the previous
new_height = driver.execute_script("return followersbox.scrollHeight;")
if new_height == last_height:
break
last_height = new_height
# Finally, scrape the followers
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]/ul/li"
followers_elems = driver.find_elements_by_xpath(xpath)
followers_temp = [e.text for e in followers_elems] # List of followers (username, full name, follow text)
followers = [] # List of followers (usernames only)
# Go through each entry in the list, append the username to the followers list
for i in followers_temp:
username, sep, name = i.partition('\n')
followers.append(username)
print("______________________________________")
print("FOLLOWERS")
return followers
def scrape_following(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Following' link
driver.find_element_by_partial_link_text("following").click()
# Wait for the following modal to load
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
SCROLL_PAUSE = 0.5 # Pause to allow loading of content
driver.execute_script("followingbox = document.getElementsByClassName('_gs38e')[0];")
last_height = driver.execute_script("return followingbox.scrollHeight;")
# We need to scroll the following modal to ensure that all following are loaded
while True:
driver.execute_script("followingbox.scrollTo(0, followingbox.scrollHeight);")
# Wait for page to load
time.sleep(SCROLL_PAUSE)
# Calculate new scrollHeight and compare with the previous
new_height = driver.execute_script("return followingbox.scrollHeight;")
if new_height == last_height:
break
last_height = new_height
# Finally, scrape the following
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]/ul/li"
following_elems = driver.find_elements_by_xpath(xpath)
following_temp = [e.text for e in following_elems] # List of following (username, full name, follow text)
following = [] # List of following (usernames only)
# Go through each entry in the list, append the username to the following list
for i in following_temp:
username, sep, name = i.partition('\n')
following.append(username)
print("\n______________________________________")
print("FOLLOWING")
return following
if __name__ == "__main__":
options = wd.ChromeOptions()
options.binary_location = chrome_binary # chrome.exe
driver_binary = r"chromedriver.exe"
driver = wd.Chrome(driver_binary, chrome_options=options)
try:
login(driver)
followers = scrape_followers(driver, account)
print(followers)
following = scrape_following(driver, account)
print(following)
finally:
driver.quit()