Python selenium retrieve instagram followers - python

I'm trying to get the list of followers from my account using a code like this using selenium (after authentication with username and pwd):
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
r=driver.find_element_by_partial_link_text("follower").click()
print r
# Wait for the followers modal to load
xpath = "//div[#style='position: relative; z-index: 1;']/div/div[2]/div/div[1]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
# You'll need to figure out some scrolling magic here. Something that can
# scroll to the bottom of the followers modal, and know when its reached
# the bottom. This is pretty impractical for people with a lot of followers
# Finally, scrape the followers
xpath = "//div[#style='position: relative; z-index: 1;']//ul/li/div/div/div/div/a"
followers_elems = driver.find_elements_by_xpath(xpath)
the problem is the variable 'r' is 'None'. Is there a problem accessing your own followers?

Click method returns nothing, so…r is none

Related

Python Selenium: Click Instagram next post button

I'm creating an Instagram bot but cannot figure out how to navigate to the next post.
Here is what I tried
#Attempt 1
next_button = driver.find_element_by_class_name('wpO6b ')
next_button.click()
#Attempt 2
_next = driver.find_element_by_class_name('coreSpriteRightPaginationArrow').click()
Neither of two worked and I get a NoSuchElementException or ElementClickInterceptedException . What corrections do I need to make here?
This is the button I'm trying to click(to get to the next post)
I have checked your class name coreSpriteRightPaginationArrow and i couldn't find any element with that exact class name. But I saw the class name partially. So it might help if you try with XPath contains as shown below.
//div[contains(#class,'coreSpriteRight')]
another xpath using class wpO6b. there are 10 elements with same class name so filtered using #aria-label='Next'
//button[#class='wpO6b ']//*[#aria-label='Next']
Try these and let me know if it works.
I have tried below code and it's clicking next button for 10 times
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
if __name__ == '__main__':
driver = webdriver.Chrome('/Users/yosuvaarulanthu/node_modules/chromedriver/lib/chromedriver/chromedriver') # Optional argument, if not specified will search path.
driver.maximize_window()
driver.implicitly_wait(15)
driver.get("https://www.instagram.com/instagram/");
time.sleep(2)
driver.find_element(By.XPATH,"//button[text()='Accept All']").click();
time.sleep(2)
#driver.find_element(By.XPATH,"//button[text()='Log in']").click();
driver.find_element(By.NAME,"username").send_keys('username')
driver.find_element(By.NAME,"password").send_keys('password')
driver.find_element(By.XPATH,"//div[text()='Log In']").click();
driver.find_element(By.XPATH,"//button[text()='Not now']").click();
driver.find_element(By.XPATH,"//button[text()='Not Now']").click();
#it open Instagram page and clicks 1st post and then it will click next post button for the specified range
driver.get("https://www.instagram.com/instagram/");
driver.find_element(By.XPATH,"//div[#class='v1Nh3 kIKUG _bz0w']").click();
for page in range(1,10):
driver.find_element(By.XPATH,"//button[#class='wpO6b ']//*[#aria-label='Next']" ).click();
time.sleep(2)
driver.quit()
As you can see, the next post right arrow button element locator is changing between the first post to other posts next page button.
In case of the first post you should use this locator:
//div[contains(#class,'coreSpriteRight')]
While for all the other posts you should use this locator
//a[contains(#class,'coreSpriteRight')]
The second element //a[contains(#class,'coreSpriteRight')] will also present on the first post page as well, however this element is not clickable there, it is enabled and can be clicked on non-first pages only.
As you can see on the picture below, the wp06b button is inside a lot of divs, in that case you might need to give Selenium that same path of divs to be able to access the button or give it a XPath.
It's not the most optimized but should work fine.
driver.find_element(By.XPATH("(.//*[normalize-space(text()) and normalize-space(.)='© 2022 Instagram from Meta'])[1]/following::*[name()='svg'][2]")).click()
Note that the XPath leads to a svg, so basically we are clicking on the svg element itself, not in the button.

Blocking login overlay window when scraping web page using Selenium

I am trying to scrape a long list of books in 10 web pages. When the loop clicks on next > button for the first time the website displays a login overlay so selenium can not find the target elements.
I have tried all the possible solutions:
Use some chrome options.
Use try-except to click X button on the overlay. But it appears only one time (when clicking next > for the first time). The problem is that when I put this try-except block at the end of while True: loop, it became infinite as I use continue in except as I do not want to break the loop.
Add some popup blocker extensions to Chrome but they do not work when I run the code although I add the extension using options.add_argument('load-extension=' + ExtensionPath).
This is my code:
options = Options()
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('disable-avfoundation-overlays')
options.add_argument('disable-internal-flash')
options.add_argument('no-proxy-server')
options.add_argument("disable-notifications")
options.add_argument("disable-popup")
Extension = (r'C:\Users\DELL\AppData\Local\Google\Chrome\User Data\Profile 1\Extensions\ifnkdbpmgkdbfklnbfidaackdenlmhgh\1.1.9_0')
options.add_argument('load-extension=' + Extension)
options.add_argument('--disable-overlay-scrollbar')
driver = webdriver.Chrome(options=options)
driver.get('https://www.goodreads.com/list/show/32339._50_?page=')
wait = WebDriverWait(driver, 2)
review_dict = {'title':[], 'author':[],'rating':[]}
html_soup = BeautifulSoup(driver.page_source, 'html.parser')
prod_containers = html_soup.find_all('table', class_ = 'tableList js-dataTooltip')
while True:
table = driver.find_element_by_xpath('//*[#id="all_votes"]/table')
for product in table.find_elements_by_xpath(".//tr"):
for td in product.find_elements_by_xpath('.//td[3]/a'):
title = td.text
review_dict['title'].append(title)
for td in product.find_elements_by_xpath('.//td[3]/span[2]'):
author = td.text
review_dict['author'].append(author)
for td in product.find_elements_by_xpath('.//td[3]/div[1]'):
rating = td.text[0:4]
review_dict['rating'].append(rating)
try:
close = wait.until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[3]/div/div/div[1]/button')))
close.click()
except NoSuchElementException:
continue
try:
element = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'next_page')))
element.click()
except TimeoutException:
break
df = pd.DataFrame.from_dict(review_dict)
df
Any help like if I can change the loop to for loop clicks next > button until the end rather than while loop or where should I put try-except block to close the overlay or if there is Chromeoption can disable overlay.
Thanks in advance
Thank you for sharing your code and the website that you are having trouble with. I was able to close the Login Modal by using xpath. I took this challenge and broke up the code using class objects. 1 object is for the selenium.webdriver.chrome.webdriver and the other object is for the page that you wanted to scrape the data against ( https://www.goodreads.com/list/show/32339 ). In the following methods, I used the Javascript return arguments[0].scrollIntoView(); method and was able to scroll to the last book that displayed on the page. After I did that, I was able to click the next button
def scroll_to_element(self, xpath : str):
element = self.chrome_driver.find_element(By.XPATH, xpath)
self.chrome_driver.execute_script("return arguments[0].scrollIntoView();", element)
def get_book_count(self):
return self.chrome_driver.find_elements(By.XPATH, "//div[#id='all_votes']//table[contains(#class, 'tableList')]//tbody//tr").__len__()
def click_next_page(self):
# Scroll to last record and click "next page"
xpath = "//div[#id='all_votes']//table[contains(#class, 'tableList')]//tbody//tr[{0}]".format(self.get_book_count())
self.scroll_to_element(xpath)
self.chrome_driver.find_element(By.XPATH, "//div[#id='all_votes']//div[#class='pagination']//a[#class='next_page']").click()
Once I clicked on the "Next" button, I saw the modal display. I was able to find the xpath for the modal and was able to close the modal.
def is_displayed(self, xpath: str, int = 5):
try:
webElement = DriverWait(self.chrome_driver, int).until(
DriverConditions.presence_of_element_located(locator = (By.XPATH, xpath))
)
return True if webElement != None else False
except:
return False
def is_modal_displayed(self):
return self.is_displayed("//body[#class='modalOpened']")
def close_modal(self):
self.chrome_driver.find_element(By.XPATH, "//div[#class='modal__content']//div[#class='modal__close']").click()
if(self.is_modal_displayed()):
raise Exception("Modal Failed To Close")
I hope this helps you to solve your problem.

Python Selenium-webdriver: click on all full review buttons when page loaded

I am trying to retrieve information (including reviews) about the app from google play store. Some of the reviews are short in length and some reviews are long and has got full review button. When the page is loaded on the browser, I want to execute click command so that it click on all review button of the reviews (if any) and then start extracting information from the page. Here is my code:
baseurl='https://play.google.com/store/apps/details?id=com.zihua.android.mytracks&hl=en&showAllReviews=true'
driver.get(baseurl)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//div[#class='d15Mdf bAhLNe']//div[#class='cQj82c']/button[text()="Full Review"]"))).click()
person_info = driver.find_elements_by_xpath("//div[#class='d15Mdf bAhLNe']")
for person in person_info:
review_response_person = ''
response_date = ''
response_text = ''
name = person.find_element_by_xpath(".//span[#class='X43Kjb']").text
review = person.find_element_by_xpath(".//div[#class='UD7Dzf']/span").text
However, program throws following error on third line of the code (i.e. WebDriverWait(driver,10)):
ElementClickInterceptedException: Message: element click intercepted: Element <button class="LkLjZd ScJHi OzU4dc " jsaction="click:TiglPc" jsname="gxjVle">...</button> is not clickable at point (380, 10). Other element would receive the click: ...
Could anyone guide me how to fix the issue?
It looks as though the full review is inside the span just below the visible trimmed review (jsname="fbQN7e") so you could do something like this
driver.get("https://play.google.com/store/apps/details?id=com.zihua.android.mytracks&hl=en&showAllReviews=true")
reviews = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//div[#jsname='fk8dgd']"))
)
for review in reviews.find_elements(By.CSS_SELECTOR, "div[jscontroller='H6eOGe']"):
reviewText = review.find_element(By.CSS_SELECTOR, "span[jsname='fbQN7e']")
print(reviewText.get_attribute("innerHTML"))
However, that will likely only return one review, you'll need to scroll the page down to the bottom so that they're all loaded in. There are other answers which give good examples on how to do that. Once added, it will iterate over each full review without the need to click a button to expand.

Selenium Hover/Click event on ajax filled menu options

Having serious issues here. Someone please help.
I am trying to login to a website. - This Works!
Redirect to the page I want after Login - This Works!
Now once in, I have to hover over the settings icon so the dropdown shows, and then click on the "Settings" options that has NO ID or CLASS or HREF.
Now there is a couple of reasons I cant do this. Number 1 is if I try to click on the link after the hover, it tells me that it's hidden and I cant interact with it. Also the menu options in the form are populated and appended once you hover, through ajax I think. They are not on the initial page load.
wait = WebDriverWait(driver, 10)
box = wait.until(EC.visibility_of_element_located((By.ID, "yucs-help_button")))
menuButton = driver.find_element_by_id("yucs-help_button")
ActionChains(driver).move_to_element(menuButton).perform()
After the above code is deployed I print the driver.page_source and can see (below) that the new menu options are there, if you don't hover, the below code will not be on the page.
Now the <a> i'm trying to click is the <span>Settings</span> option, and for the life of me, it will not work. Either can't find it, not clickable, can't interact with it, etc, etc, etc. xpath, css_selector, partial_name, nothing finds this thing. Also whats weird is once you click on it, from a browser, it appends an ID to <span> So weird. Any ideas?
<a data-ylk="rspns:nav;t3:tl-lst;t5:custitm;slk:custitm;elm:itm;elmt:custitm;itc:0;cpos:2" class="C(#000)! Td(u):h " data-mad="options" data-customevt="true" href="#" data-rapid_p="18"><span>Settings</span></a>
To perform mouse over event on element you should try to use .execute_script() using following java script :-
wait = WebDriverWait(driver, 10)
box = wait.until(EC.visibility_of_element_located((By.ID, "yucs-help_button")))
menuButton = driver.find_element_by_id("yucs-help_button")
driver.execute_script("var clickEvent = document.createEvent('MouseEvents');clickEvent.initEvent('mouseover', true, true); arguments[0].dispatchEvent(clickEvent);", menuButton)
Now after successfully mouse over you should try to click on Settings link as below :-
driver.find_element_by_xpath("//span[contains(text(), 'Settings')]/parent::a[#data-mad = 'options']").click()
Hope it will help you..:)

How to web scrape followers from Instagram web browser?

Can anyone tell me how to access the underlying URL to view a given user's Instagram followers? I am able to do this with Instagram API, but given the pending changes to the approval process, I have decided to switch to scraping.
The Instagram web browser allows you to view the follower list for any given public user - for example, to view Instagram's followers, visit "https://www.instagram.com/instagram", and then click on the followers URL to open a window that paginates through viewers (note: you must be logged in to your account to view this).
I note that the URL changes to "https://www.instagram.com/instagram/followers" when this window pops up, but I can't seem to view the underlying page source for this URL.
Since it appears on my browser window, I assume that I will be able to scrape. But do I have to use a package like Selenium? Does anyone know what the underlying URL is, so I don't have to use Selenium?
As an example, I am able to directly access the underlying feed data by visiting "instagram.com/instagram/media/", from which I can scrape and paginate through all iterations. I would like to do something similar with the list of followers, and access this data directly (rather than using Selenium).
EDIT: Dec 2018 Update:
Things have changed in Insta land since this was posted. Here is an updated script that is a bit more pythonic and better utilizes XPATH/CSS paths.
Note that to use this updated script, you must install the explicit package (pip install explicit), or convert each line with waiter to a pure selenium explicit wait.
import itertools
from explicit import waiter, XPATH
from selenium import webdriver
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
waiter.find_write(driver, "//div/input[#name='username']", username, by=XPATH)
waiter.find_write(driver, "//div/input[#name='password']", password, by=XPATH)
waiter.find_element(driver, "//div/button[#type='submit']", by=XPATH).click()
# Wait for the user dashboard page to load
waiter.find_element(driver, "//a/span[#aria-label='Find People']", by=XPATH)
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
# driver.find_element_by_partial_link_text("follower").click()
waiter.find_element(driver, "//a[#href='/instagram/followers/']", by=XPATH).click()
# Wait for the followers modal to load
waiter.find_element(driver, "//div[#role='dialog']", by=XPATH)
# At this point a Followers modal pops open. If you immediately scroll to the bottom,
# you hit a stopping point and a "See All Suggestions" link. If you fiddle with the
# model by scrolling up and down, you can force it to load additional followers for
# that person.
# Now the modal will begin loading followers every time you scroll to the bottom.
# Keep scrolling in a loop until you've hit the desired number of followers.
# In this instance, I'm using a generator to return followers one-by-one
follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality
for group in itertools.count(start=1, step=12):
for follower_index in range(group, group + 12):
yield waiter.find_element(driver, follower_css.format(follower_index)).text
# Instagram loads followers 12 at a time. Find the last follower element
# and scroll it into view, forcing instagram to load another 12
# Even though we just found this elem in the previous for loop, there can
# potentially be large amount of time between that call and this one,
# and the element might have gone stale. Lets just re-acquire it to avoid
# that
last_follower = waiter.find_element(driver, follower_css.format(follower_index))
driver.execute_script("arguments[0].scrollIntoView();", last_follower)
if __name__ == "__main__":
account = 'instagram'
driver = webdriver.Chrome()
try:
login(driver)
# Print the first 75 followers for the "instagram" account
print('Followers of the "{}" account'.format(account))
for count, follower in enumerate(scrape_followers(driver, account=account), 1):
print("\t{:>3}: {}".format(count, follower))
if count >= 75:
break
finally:
driver.quit()
I did a quick benchmark to show how performance decreases exponentially the more followers you attempt to scrape this way:
$ python example.py
Followers of the "instagram" account
Found 100 followers in 11 seconds
Found 200 followers in 19 seconds
Found 300 followers in 29 seconds
Found 400 followers in 47 seconds
Found 500 followers in 71 seconds
Found 600 followers in 106 seconds
Found 700 followers in 157 seconds
Found 800 followers in 213 seconds
Found 900 followers in 284 seconds
Found 1000 followers in 375 seconds
Original post:
Your question is a little confusing. For instance, I'm not really sure what "from which I can scrape and paginate through all iterations" actually means. What are you currently using to scrape and paginate?
Regardless, instagram.com/instagram/media/ is not the same type of endpoint as instagram.com/instagram/followers. The media endpoint appears to be a REST API, configured to return an easily parseable JSON object.
The followers endpoint isn't really a RESTful endpoint from what I can tell. Rather, Instagram AJAXs in the information to the page source (using React?) after you click the Followers button. I don't think you will be able to get that information without using something like Selenium, which can load/render the javascript that displays the followers to the user.
This example code will work:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
driver.find_element_by_xpath("//div/input[#name='username']").send_keys(username)
driver.find_element_by_xpath("//div/input[#name='password']").send_keys(password)
driver.find_element_by_xpath("//span/button").click()
# Wait for the login page to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
xpath = "//div[#style='position: relative; z-index: 1;']/div/div[2]/div/div[1]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
# You'll need to figure out some scrolling magic here. Something that can
# scroll to the bottom of the followers modal, and know when its reached
# the bottom. This is pretty impractical for people with a lot of followers
# Finally, scrape the followers
xpath = "//div[#style='position: relative; z-index: 1;']//ul/li/div/div/div/div/a"
followers_elems = driver.find_elements_by_xpath(xpath)
return [e.text for e in followers_elems]
if __name__ == "__main__":
driver = webdriver.Chrome()
try:
login(driver)
followers = scrape_followers(driver, "instagram")
print(followers)
finally:
driver.quit()
This approach is problematic for a number of reasons, chief among them being how slow it is, relative to the the API.
Update: March 2020
This is just the Levi answer with a small updates in some parts because as it is now, it didn't quit the driver successfully. This also gets by default all the followers, as everyone else have said, it's not intended for a lot of followers.
import itertools
from explicit import waiter, XPATH
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from time import sleep
def login(driver):
username = "" # <username here>
password = "" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
sleep(3)
# Login
driver.find_element_by_name("username").send_keys(username)
driver.find_element_by_name("password").send_keys(password)
submit = driver.find_element_by_tag_name('form')
submit.submit()
# Wait for the user dashboard page to load
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
# driver.find_element_by_partial_link_text("follower").click
sleep(2)
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
waiter.find_element(driver, "//div[#role='dialog']", by=XPATH)
allfoll = int(driver.find_element_by_xpath("//li[2]/a/span").text)
# At this point a Followers modal pops open. If you immediately scroll to the bottom,
# you hit a stopping point and a "See All Suggestions" link. If you fiddle with the
# model by scrolling up and down, you can force it to load additional followers for
# that person.
# Now the modal will begin loading followers every time you scroll to the bottom.
# Keep scrolling in a loop until you've hit the desired number of followers.
# In this instance, I'm using a generator to return followers one-by-one
follower_css = "ul div li:nth-child({}) a.notranslate" # Taking advange of CSS's nth-child functionality
for group in itertools.count(start=1, step=12):
for follower_index in range(group, group + 12):
if follower_index > allfoll:
raise StopIteration
yield waiter.find_element(driver, follower_css.format(follower_index)).text
# Instagram loads followers 12 at a time. Find the last follower element
# and scroll it into view, forcing instagram to load another 12
# Even though we just found this elem in the previous for loop, there can
# potentially be large amount of time between that call and this one,
# and the element might have gone stale. Lets just re-acquire it to avoid
# tha
last_follower = waiter.find_element(driver, follower_css.format(group+11))
driver.execute_script("arguments[0].scrollIntoView();", last_follower)
if __name__ == "__main__":
account = "" # <account to check>
driver = webdriver.Firefox(executable_path="./geckodriver")
try:
login(driver)
print('Followers of the "{}" account'.format(account))
for count, follower in enumerate(scrape_followers(driver, account=account), 1):
print("\t{:>3}: {}".format(count, follower))
finally:
driver.quit()
I noticed that the previous answer no longer works, so I made an updated version based on the previous answer, which includes the scrolling feature (to get all the users in the list, not just those loaded initially). In addition, this scrapes both followers and following. (You'll need to download chromedriver as well)
import time
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# The account you want to check
account = ""
# Chrome executable
chrome_binary = r"chrome.exe" # Add your path here
def login(driver):
username = "" # Your username
password = "" # Your password
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
driver.find_element_by_xpath("//div/input[#name='username']").send_keys(username)
driver.find_element_by_xpath("//div/input[#name='password']").send_keys(password)
driver.find_element_by_xpath("//span/button").click()
# Wait for the login page to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
SCROLL_PAUSE = 0.5 # Pause to allow loading of content
driver.execute_script("followersbox = document.getElementsByClassName('_gs38e')[0];")
last_height = driver.execute_script("return followersbox.scrollHeight;")
# We need to scroll the followers modal to ensure that all followers are loaded
while True:
driver.execute_script("followersbox.scrollTo(0, followersbox.scrollHeight);")
# Wait for page to load
time.sleep(SCROLL_PAUSE)
# Calculate new scrollHeight and compare with the previous
new_height = driver.execute_script("return followersbox.scrollHeight;")
if new_height == last_height:
break
last_height = new_height
# Finally, scrape the followers
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]/ul/li"
followers_elems = driver.find_elements_by_xpath(xpath)
followers_temp = [e.text for e in followers_elems] # List of followers (username, full name, follow text)
followers = [] # List of followers (usernames only)
# Go through each entry in the list, append the username to the followers list
for i in followers_temp:
username, sep, name = i.partition('\n')
followers.append(username)
print("______________________________________")
print("FOLLOWERS")
return followers
def scrape_following(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Following' link
driver.find_element_by_partial_link_text("following").click()
# Wait for the following modal to load
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
SCROLL_PAUSE = 0.5 # Pause to allow loading of content
driver.execute_script("followingbox = document.getElementsByClassName('_gs38e')[0];")
last_height = driver.execute_script("return followingbox.scrollHeight;")
# We need to scroll the following modal to ensure that all following are loaded
while True:
driver.execute_script("followingbox.scrollTo(0, followingbox.scrollHeight);")
# Wait for page to load
time.sleep(SCROLL_PAUSE)
# Calculate new scrollHeight and compare with the previous
new_height = driver.execute_script("return followingbox.scrollHeight;")
if new_height == last_height:
break
last_height = new_height
# Finally, scrape the following
xpath = "/html/body/div[4]/div/div/div[2]/div/div[2]/ul/li"
following_elems = driver.find_elements_by_xpath(xpath)
following_temp = [e.text for e in following_elems] # List of following (username, full name, follow text)
following = [] # List of following (usernames only)
# Go through each entry in the list, append the username to the following list
for i in following_temp:
username, sep, name = i.partition('\n')
following.append(username)
print("\n______________________________________")
print("FOLLOWING")
return following
if __name__ == "__main__":
options = wd.ChromeOptions()
options.binary_location = chrome_binary # chrome.exe
driver_binary = r"chromedriver.exe"
driver = wd.Chrome(driver_binary, chrome_options=options)
try:
login(driver)
followers = scrape_followers(driver, account)
print(followers)
following = scrape_following(driver, account)
print(following)
finally:
driver.quit()

Categories