So I wrote this code to get the list of followers on Instagram using instaloader library in python :
login_name = 'beyondhelloworld'
target_profile = 'femindharamshi'
# OR
#import sys
#target_profile = sys.argv[1] # pass in target profile as argument
from instaloader import Instaloader, Profile
loader = Instaloader()
# login
try:
loader.load_session_from_file(login_name)
except FileNotFoundError:
loader.context.log("Session file does not exist yet - Logging in.")
if not loader.context.is_logged_in:
loader.interactive_login(login_name)
loader.save_session_to_file()
profile = Profile.from_username(loader.context, target_profile)
followers = profile.get_followers()
loader.context.log()
loader.context.log('Profile {} has {} followers:'.format(profile.username, profile.followers))
loader.context.log()
for follower in followers:
loader.context.log(follower.username, flush=True)
But I keep getting this error :
Loaded session from /Users/femindharamshi/.config/instaloader/session-beyondhelloworld.
Traceback (most recent call last):
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 597, in _obtain_metadata
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
KeyError: 'graphql'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "il.py", line 20, in <module>
profile = Profile.from_username(loader.context, target_profile)
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 552, in from_username
profile._obtain_metadata() # to raise ProfileNotExistException now in case username is invalid
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 606, in _obtain_metadata
', '.join(similar_profiles[0:5]))) from err
instaloader.exceptions.ProfileNotExistsException: Profile femindharamshi does not exist.
The most similar profile is: femindharamshi.
How do I solve this issue?
The output says that profile "femindharamshi" does not exist but that is what my profile is. It also says :
The most similar profile is: femindharamshi.
import instaloader
import random
import os
dir_path_driver = os.getcwd()
def username_password():
listusername = []
with open("./username.txt","r") as usernames:
for username in usernames:
listusername.append((username.rstrip("\n")).split(":"))
if len(listusername) == 1:
select = 0
else:
select = random.randint(0,len(listusername))
return listusername[select][0],listusername[select][1]
def get_followers():
L = instaloader.Instaloader()
# Login or load session
username,password =username_password()
listfile = os.listdir(dir_path_driver+"/cookie")
for i in listfile:
if i != f"{username}":
L.login(username, password)
L.save_session_to_file(filename=dir_path_driver+"/cookie/"+f"{username}")
else:
L.load_session_from_file(filename=dir_path_driver+"/cookie/"+f"{username}",username = username)
file = open("prada_followers.txt","a+")
profile = instaloader.Profile.from_username(L.context, "idinstagram")
for followee in profile.get_followers():
username = followee.username
file.write(username + "\n")
file.close()
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
class InstaBot:
"""InstaBot can login, can return unfollowers that don't
follow you back.
Object requires two args.
'Username' & 'Password' """
def __init__(self,username,pw):
self.username = username
self.pw = pw
self.driver = webdriver.Chrome(executable_path='chromedriver.exe')
self.base_url = "https://instagram.com"
self.driver.get("{}".format(self.base_url))
sleep(2)
self.driver.maximize_window()
self.login()
def login(self):
self.driver.find_element_by_xpath("//input[#name=\"username\"]")\
.send_keys(self.username)
self.driver.find_element_by_xpath("//input[#name=\"password\"]")\
.send_keys(self.pw)
self.driver.find_element_by_xpath("//button[#type=\"submit\"]")\
.click()
sleep(10)
self.driver.find_element_by_xpath("//button[contains(text(), 'Not Now')]")\
.click()
sleep(2)
def get_unfollowers(self):
self.driver.find_element_by_xpath("//a[contains(#href, '/{}')]".format(self.username))\
.click()
sleep(3)
self.driver.find_element_by_xpath("//a[contains(#href, '/following')]")\
.click()
sleep(2)
following = self._get_names()
self.driver.find_element_by_xpath("//a[contains(#href, '/followers')]")\
.click()
sleep(2)
followers = self._get_names()
not_following_back = [user for user in following if user not in followers]
return not_following_back
## suggetions = self.driver.find_element_by_xpath('//h4[contains(text(), Suggetions)]')
## self.driver.execute_script('arguments[0].scrollIntoView()',suggetions)
def _get_names(self):
scroll_box = self.driver.find_element_by_xpath("/html/body/div[4]/div/div[2]")
last_ht , ht = 0,1
while last_ht != ht:
last_ht = ht
sleep(1)
ht = self.driver.execute_script("""
arguments[0].scrollTo(0,arguments[0].scrollHeight);
return arguments[0].scrollHeight;
""", scroll_box)
links = scroll_box.find_elements_by_tag_name('a')
names = [name.text for name in links if name.text != '']
sleep(2)
self.driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/button")\
.click()
return names
def navigate_to_user(self,user):
self.driver.get("{}/{}".format(self.base_url,user))
def scroll_down(self):
last_height = self.driver.execute_script("return document.body.scrollHeight")
while True:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(2)
new_height = self.driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
my_bot = InstaBot(Username,Password)
##unfollowers = my_bot.get_unfollowers() #will return a list
my_bot.navigate_to_user(Any User Name that you follow) #Will return your friend's followers list
Related
the bot enters the profile, accesses the post but when pulling the comments from the txt file it closes, without making any comments.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from time import sleep
import instaloader
import random
import os.path
driver = webdriver.Chrome(r"C:\\Users\\Windows\\Downloads\\chromedriver.exe")
def sorteioInstagram():
abrindo_instagram("inspiracaobiel", "biel123", "https://www.instagram.com/p/CoI52RJOqnT/")
if os.path.isfile('seguidores.txt'):
print("Arquivo de Seguidores já carregado...")
else:
pegar_seguidores('bielbibiel')
comentandoPost()
def abrindo_instagram(username,password, url):
print("Abrindo Instagram...")
driver.get("https://www.instagram.com/")
sleep(2)
print("Fazendo Login no Instagram...")
driver.find_element(By.XPATH,"//input\[#name="username"\]").send_keys(username)
driver.find_element(By.XPATH,"//input\[#name="password"\]").send_keys(password)
driver.find_element(By.XPATH,"//button\[#type="submit"\]").click()
sleep(10)
print("Negando Solicitação de Segurança Instagram...")
driver.find_element(By.XPATH,"//button[text()='Agora não']").click()
sleep(5)
driver.find_element(By.XPATH,"//button[text()='Agora não']").click()
sleep(4)
print("Acessando o Post do sorteio...")
driver.get(url)
def pegar_seguidores(usuario):
L = instaloader.Instaloader()
L.login('inspiracaobiel', 'biel123')
profile = instaloader.Profile.from_username(L.context, usuario)
print(f"Salvando Seguidores de {usuario}...")
#Salvando Seguidores em Arquivo .TXT
file = open("seguidores.txt", "a+")
for followee in profile.get_followers():
username = "#" + followee.username
file.write(username + "\n")
file.close()
def comentandoPost():
z = 0
while 1 == 1:
cmt = driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea')
cmt.click()
comment = lendo_arquivo()
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(comment)
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(' ')
sleep(10)
driver.find_element(By.XPATH,'//*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(Keys.ENTER)
sleep(10)
driver.find_element(By.XPATH,'//\*\[#id="react-root"\]/section/main/div/div\[1\]/article/div\[3\]/section\[3\]/div/form/textarea').send_keys(Keys.ENTER)
z += 1
print(f"{z}")
sleep(60)
def lendo_arquivo():
with open("seguidores.txt", "r") as file:
allText = file.read()
words = list(map(str, allText.split()))
return random.choice(words)
sorteioInstagram()
```
ERROR:
Traceback (most recent call last):
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 76, in \<module\>
sorteioInstagram()
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 17, in sorteioInstagram
pegar_seguidores('bielbibiel')
File "c:\\Users\\gabri\\Downloads\\Codigo Comentarios\\venv\\Scripts\\main.py", line 41, in pegar_seguidores
L.login('inspiracaobiel', 'biel123')
File "C:\\Users\\gabri\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\instaloader\\instaloader.py", line 634, in login
self.context.login(user, passwd)
File "C:\\Users\\gabri\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\instaloader\\instaloadercontext.py", line 239, in login
raise ConnectionException("Login: Checkpoint required. Point your browser to "
instaloader.exceptions.ConnectionException: Login: Checkpoint required. Point your browser to https://www.instagram.com/challenge/action/AXG6u8rHRprRccgzcgLKpQNJzv0wH2s1vajDDBvJi2xj-ypRREOtDgZK3e5Ee8HekhTocA0/AfxgQISXQXjHI8kasJEBP020fZ1GKKqFXXJGeaFzdDei1KKj2Pc3OVuZl5K_J3Og2Mxa0Yx64gubOg/ffc_XxXKP7ukaNEpowIhzsecXIF8lbX5oShdPs03HikkaCikTbJ50ZWn38x98bzTC5ZI/ - follow the instructions, then retry.
I tried to fix the comment part but I couldn't, but I couldn't resolve the error, it keeps closing and not making any comment.
I am trying to take an account of over 1,000,000 followers on instagram and add their usernames to a txt file. I am trying to use Selenium for this but my authorization for login fails every time i login. Any advice on how to get around this? I assume the site believes this is a hack but im not sure.
from selenium import webdriver as web
from selenium.webdriver.common.keys import Keys
import time
import random
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
bot_username = 'null'
bot_password = 'Null'
profiles = ['Enter Here']
amount = 300
# 'usernames' or 'links'
result = 'usernames'
us = ''
class Instagram():
def __init__(self, username, password):
self.username = username
self.password = password
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
self.browser = web.Chrome("chromedriver",options=options)
self.browser.set_window_size(400, 900)
def close_browser(self):
self.browser.close()
self.browser.quit()
def login(self):
browser = self.browser
try:
browser.get('https://www.instagram.com')
time.sleep(random.randrange(3, 5))
# Enter username:
username_input = browser.find_element_by_name('username')
username_input.clear()
username_input.send_keys(self.username)
time.sleep(random.randrange(2, 4))
# Enter password:
password_input = browser.find_element_by_name('password')
password_input.clear()
password_input.send_keys(self.password)
time.sleep(random.randrange(1, 2))
password_input.send_keys(Keys.ENTER)
time.sleep(random.randrange(3, 5))
print(f'[{self.username}] Successfully logged on!')
except Exception as ex:
print(f'[{self.username}] Authorization fail')
self.close_browser()
def xpath_exists(self, url):
browser = self.browser
try:
browser.find_element_by_xpath(url)
exist = True
except NoSuchElementException:
exist = False
return exist
def get_followers(self, users, amount):
browser = self.browser
followers_list = []
for user in users:
browser.get('https://instagram.com/' + user)
time.sleep(random.randrange(3, 5))
followers_button = browser.find_element_by_xpath('/html/body/div[1]/section/main/div/ul/li[2]/a/span')
count = followers_button.get_attribute('title')
if ',' in count:
count = int(''.join(count.split(',')))
else:
count = int(count)
if amount > count:
print(f'You set amount = {amount} but there are {count} followers, then amount = {count}')
amount = count
followers_button.click()
loops_count = int(amount / 12)
print(f'Scraping. Total: {amount} usernames. Wait {loops_count} iterations')
time.sleep(random.randrange(8,10))
followers_ul = browser.find_element_by_xpath("/html/body/div[6]/div/div/div[2]")
time.sleep(random.randrange(5,7))
try:
for i in range(1, loops_count + 1):
browser.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", followers_ul)
time.sleep(random.randrange(8, 10))
all_div = followers_ul.find_elements_by_tag_name("li")
for us in all_div:
us = us.find_element_by_tag_name("a").get_attribute("href")
if result == 'usernames':
us1 = us.replace("https://www.instagram.com/", "")
us = us1.replace("/", "")
followers_list.append(us)
time.sleep(1)
f3 = open('userlist.txt', 'w')
for list in followers_list:
f3.write(list + '\n')
print(f'Got: {len(followers_list)} usernames of {amount}. Saved to file.')
time.sleep(random.randrange(3, 5))
except Exception as ex:
print(ex)
self.close_browser()
return followers_list
bot = Instagram(bot_username, bot_password)
bot.login()
followers = bot.get_followers(profiles, amount)
I wrote a script to save LinkedIn information like: name, last name, graduated university and most important link to LinkedIn script. My script is using Selenium and chromedriver to enter LinkedIn and then scrape. My problem is with saving profile links. Links aren't scraping properly. Here's my code:
import csv
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
import parameters
import re
class LinkedIn():
def __init__(self):
self.driver = webdriver.Chrome()
self.people_ls_dic = []
self.csv_name_colums = ["name","degree_connection","zawod","region","opis","firma","link"]
def login(self):
self.driver.get("http://www.linkedin.com/login")
sleep(3)
username = self.driver.find_element_by_name('session_key')
username.send_keys(parameters.linkedin_username)
password = self.driver.find_element_by_name('session_password')
password.send_keys(parameters.linkedin_password)
sign_in_button = self.driver.find_elements_by_xpath('//*[#class="btn__primary--large from__button--floating mercado-button--primary"]')
sign_in_button[0].click()
sleep(5)
def neville_try(self):
sleep(3)
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
profiles = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[2]/div/div/div/div[2]/ul')
profiles = profiles.find_elements_by_css_selector('li')
profiles = [(i.text, i.find_element_by_xpath('//*[#data-control-name="entity_result"]').get_attribute('href')) for i in profiles]
print("\n\n")
info_ls = []
for profile, link in profiles:
info_ls.append( (profile.split('\n'), link) )
for iteam, link in info_ls:
if 'Learn more' in iteam:
info_ls.remove(iteam)
print(info_ls)
info_ls = [(iteam, link) for iteam, link in info_ls if iteam != ['']]
for info, link in info_ls:
if info[0] == info[1]:
info.remove(info[1])
try:
name = info[0]
degree_connection = info[2]
zawod = info[3]
region = info[4]
opis = info[5]
opis_f = opis.replace(","," ")
list_of_user_data = [name, zawod, opis_f]
for data in list_of_user_data:
try:
comp = re.findall('at ([a-zA-Z0-9]+)',data)
firma = comp[0]
break
except:
continue
if comp == []:
firma = "brak_danych"
self.people_ls_dic.append({"name":name,"degree_connection":degree_connection,"zawod":zawod,"region":region,"opis":opis,"firma":firma,"link":link})
except:
pass
def go_home(self):
home = self.driver.find_element_by_xpath('//*[#id="inbug-nav-item"]/a')
home.click()
def next_page(self):
sleep(3)
next_p = self.driver.find_element_by_xpath('//*[#aria-label="Next"]')
next_p.click()
def open_people(self):
self.driver.get("https://www.linkedin.com/search/results/people/?origin=DISCOVER_FROM_SEARCH_HOME")
sleep(2)
search_bar = self.driver.find_element_by_xpath('//*[#class="search-global-typeahead__input always-show-placeholder"]')
search_bar.send_keys(parameters.search_query)
search_bar.send_keys(Keys.ENTER)
sleep(3)
def filter_company(self):
cl = self.driver.find_element_by_xpath('//*[#aria-label="Current company filter. Clicking this button displays all Current company filter options."]')
cl.click()
for comp in parameters.list_of_comp:
text = self.driver.find_element_by_xpath('//*[#placeholder="Add a company"]')
text.send_keys(comp)
sleep(1)
filt = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[1]/div/div/div[2]/div/div[2]')
sleep(0.2)
filt.click()
sleep(1)
apply = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[2]/button[2]')
apply.click()
sleep(1)
def close(self):
self.driver.close()
def write_to_csv(self):
csv_file = "neville.csv"
with open(csv_file, 'w', encoding="utf-8", newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = self.csv_name_colums)
writer.writeheader()
for data in self.people_ls_dic:
writer.writerow(data)
scrypt = LinkedIn()
scrypt.login()
scrypt.open_people()
ls = range(parameters.ilosc_stron)
scrypt.filter_company()
for i in sorted(ls,reverse=True):
scrypt.neville_try()
if i == 1:
break
scrypt.next_page()
scrypt.write_to_csv()
scrypt.close()
Ofc I have file with parameters and i looks' like this:
linkedin_username = ""
linkedin_password = ""
search_query = 'vcloud director'
list_of_comp = ['Microsoft']
ilosc_stron = 2 //number of pages to click on
This is the code:
#A simple Kahoot bot that joins Kahoot game and sits idle
#Version 1.4.4
#ENTech SS
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import random
#Asking for info here:
print("Kahoot spammer v 1.4.4")
pin = input("Please enter a game pin:")
name = input("Please enter a name:")
join = input("Please enter a amount of bots to join(Default is 50):")
tab = 0
nameb = str(name)
bot_num = 0
#Start chrome
print("Starting chrome...")
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options="options")
#driver = webdriver.Chrome(chrome_options=chrome_options)
#If join feild is blank, then default is 50
if join=='':
join=50
def namec():
#Code for clarifying name
global join, bot_num, nameb
num=random.randint(1,999)*3
if join=='1':
nameb=name
bot_num = bot_num + 1
if int(join)>=2:
if bot_num==join:
print("Name generation completed")
nameb=(name + '.' + str(num))
bot_num = bot_num + 1
def bot():
global nameb, driver, tab
if bot_num==1:
print("No new window necessary")
elif bot_num >=2:
print("Opening new window...")
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[tab])
print("Navigating to Kahoot...")
#Navigate to kahoot.com
driver.get("https://kahoot.it/")
#Wait untill element is available
wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID, 'inputSession')))
#Finding input box
inputb = driver.find_element_by_id('inputSession')
print("Joining game...")
#Inputting pin
inputb.send_keys(pin)
inputb.submit()
#Entering name
element = wait.until(EC.element_to_be_clickable((By.ID, 'username')))
gname = driver.find_element_by_id('username')
namec()
gname.send_keys(nameb)
gname.submit()
#Checking login
print("Checking if login was succesfull...")
try:
content = driver.find_element_by_class_name('ng-binding')
except:
print("Error checking page:\nId could have changed, or connection could have dropped.")
x=input("Press any key to exit...")
print("Success!")
print("Bot [" + bot_num + "] is now in the game ;)")
tab = tab + 1
#Code for running a set amoun of times
for x in range(int(join)):
bot()
And this is the error I am getting:
Traceback (most recent call last):
File "/Users/adamkatborg/Desktop/kbot-master/testbot.py", line 23, in <module>
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options="options")
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/selenium/webdriver/chrome/webdriver.py", line 64, in __init__
desired_capabilities = options.to_capabilities()
AttributeError: 'str' object has no attribute 'to_capabilities'
Can someone please help me?
On line 23 you are passing "options" as the options parameter, it looks like it is expecting an object instead of a string.
What you probably meant to do was pass the chrome_options object. The line should look like this:
driver = webdriver.Chrome(executable_path="/Users/adamkatborg/Desktop/chromedriver-2", options=chrome_options)
I am new to python and I need help with web scraping code to save a dynamic map every week.
This is the site I am interested in.
The purpose is to get to the page, select season, select week, and download image to a local folder. I'll use the image to integrate for an automated weekly report using SAS.
thank you in advance!
import sys
import os
import time
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium import webdriver
import arrow
BASE_URL = 'https://gis.cdc.gov/grasp/fluview/main.html'
DOWNLOAD_PATH = "/Users/"
def closeWebDriver(driver):
if os.name == 'nt':
driver.quit()
else:
driver.close()
def getImage():
profile = FirefoxProfile()
profile.set_preference("browser.download.panel.shown", False)
profile.set_preference("browser.helperApps.neverAsk.openFile","image/png")
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "image/png")
profile.set_preference("browser.download.folderList", 2);
profile.set_preference("browser.download.dir", DOWNLOAD_PATH)
driver = webdriver.Firefox(firefox_profile=profile)
driver.get(BASE_URL)
time.sleep(5)
if not isValidTimeFrame(driver):
print('Not the time to download yet!')
closeWebDriver(driver)
return
selectFirstWeek(driver)
print('- Consume the web.')
wrapper = driver.find_element_by_class_name('downloads-help-area')
download_img_els = wrapper.find_elements_by_class_name('downloads-button')
for el in download_img_els:
text = el.text.encode('utf-8')
# print(text)
if 'download image' == text.strip().lower():
# Process
downloadImage(el)
break
time.sleep(5)
closeWebDriver(driver)
def isValidTimeFrame(driver):
seasons_button = driver.find_element_by_class_name('seasons-button')
time_frame = seasons_button.text.encode('utf-8').strip().lower()
current_year = arrow.now().to('local')
current_year_str = current_year.format('YYYY')
next_year = current_year.shift(years=1)
next_year_str = next_year.format('YY')
print(time_frame)
compare_year = '%s-%s' % (current_year_str, next_year_str)
return time_frame == compare_year
def selectFirstWeek(driver):
prev = driver.find_element_by_id('prevMap')
week = driver.find_element_by_id('weekSlider')
while True:
print(week)
current_number = week.get_property('value')
print('- Week: ' + current_number)
prev.click()
if int(current_number) < 2:
break;
time.sleep(1)
def downloadImage(el):
print('- Click on ' + el.text)
el.click()
getImage()