web scraping with python, with navigation controller

web scraping with python, with navigation controller - python

I am new to python and I need help with web scraping code to save a dynamic map every week.
This is the site I am interested in.
The purpose is to get to the page, select season, select week, and download image to a local folder. I'll use the image to integrate for an automated weekly report using SAS.
thank you in advance!

import sys
import os
import time
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium import webdriver
import arrow
BASE_URL = 'https://gis.cdc.gov/grasp/fluview/main.html'
DOWNLOAD_PATH = "/Users/"
def closeWebDriver(driver):
if os.name == 'nt':
driver.quit()
else:
driver.close()
def getImage():
profile = FirefoxProfile()
profile.set_preference("browser.download.panel.shown", False)
profile.set_preference("browser.helperApps.neverAsk.openFile","image/png")
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "image/png")
profile.set_preference("browser.download.folderList", 2);
profile.set_preference("browser.download.dir", DOWNLOAD_PATH)
driver = webdriver.Firefox(firefox_profile=profile)
driver.get(BASE_URL)
time.sleep(5)
if not isValidTimeFrame(driver):
print('Not the time to download yet!')
closeWebDriver(driver)
return
selectFirstWeek(driver)
print('- Consume the web.')
wrapper = driver.find_element_by_class_name('downloads-help-area')
download_img_els = wrapper.find_elements_by_class_name('downloads-button')
for el in download_img_els:
text = el.text.encode('utf-8')
# print(text)
if 'download image' == text.strip().lower():
# Process
downloadImage(el)
break
time.sleep(5)
closeWebDriver(driver)
def isValidTimeFrame(driver):
seasons_button = driver.find_element_by_class_name('seasons-button')
time_frame = seasons_button.text.encode('utf-8').strip().lower()
current_year = arrow.now().to('local')
current_year_str = current_year.format('YYYY')
next_year = current_year.shift(years=1)
next_year_str = next_year.format('YY')
print(time_frame)
compare_year = '%s-%s' % (current_year_str, next_year_str)
return time_frame == compare_year
def selectFirstWeek(driver):
prev = driver.find_element_by_id('prevMap')
week = driver.find_element_by_id('weekSlider')
while True:
print(week)
current_number = week.get_property('value')
print('- Week: ' + current_number)
prev.click()
if int(current_number) < 2:
break;
time.sleep(1)
def downloadImage(el):
print('- Click on ' + el.text)
el.click()
getImage()

Related

Using Selenium to get a list of Instagram followers

I am trying to take an account of over 1,000,000 followers on instagram and add their usernames to a txt file. I am trying to use Selenium for this but my authorization for login fails every time i login. Any advice on how to get around this? I assume the site believes this is a hack but im not sure.
from selenium import webdriver as web
from selenium.webdriver.common.keys import Keys
import time
import random
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options
bot_username = 'null'
bot_password = 'Null'
profiles = ['Enter Here']
amount = 300
# 'usernames' or 'links'
result = 'usernames'
us = ''
class Instagram():
def __init__(self, username, password):
self.username = username
self.password = password
options = Options()
options.add_experimental_option("excludeSwitches", ["enable-logging"])
self.browser = web.Chrome("chromedriver",options=options)
self.browser.set_window_size(400, 900)
def close_browser(self):
self.browser.close()
self.browser.quit()
def login(self):
browser = self.browser
try:
browser.get('https://www.instagram.com')
time.sleep(random.randrange(3, 5))
# Enter username:
username_input = browser.find_element_by_name('username')
username_input.clear()
username_input.send_keys(self.username)
time.sleep(random.randrange(2, 4))
# Enter password:
password_input = browser.find_element_by_name('password')
password_input.clear()
password_input.send_keys(self.password)
time.sleep(random.randrange(1, 2))
password_input.send_keys(Keys.ENTER)
time.sleep(random.randrange(3, 5))
print(f'[{self.username}] Successfully logged on!')
except Exception as ex:
print(f'[{self.username}] Authorization fail')
self.close_browser()
def xpath_exists(self, url):
browser = self.browser
try:
browser.find_element_by_xpath(url)
exist = True
except NoSuchElementException:
exist = False
return exist
def get_followers(self, users, amount):
browser = self.browser
followers_list = []
for user in users:
browser.get('https://instagram.com/' + user)
time.sleep(random.randrange(3, 5))
followers_button = browser.find_element_by_xpath('/html/body/div[1]/section/main/div/ul/li[2]/a/span')
count = followers_button.get_attribute('title')
if ',' in count:
count = int(''.join(count.split(',')))
else:
count = int(count)
if amount > count:
print(f'You set amount = {amount} but there are {count} followers, then amount = {count}')
amount = count
followers_button.click()
loops_count = int(amount / 12)
print(f'Scraping. Total: {amount} usernames. Wait {loops_count} iterations')
time.sleep(random.randrange(8,10))
followers_ul = browser.find_element_by_xpath("/html/body/div[6]/div/div/div[2]")
time.sleep(random.randrange(5,7))
try:
for i in range(1, loops_count + 1):
browser.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", followers_ul)
time.sleep(random.randrange(8, 10))
all_div = followers_ul.find_elements_by_tag_name("li")
for us in all_div:
us = us.find_element_by_tag_name("a").get_attribute("href")
if result == 'usernames':
us1 = us.replace("https://www.instagram.com/", "")
us = us1.replace("/", "")
followers_list.append(us)
time.sleep(1)
f3 = open('userlist.txt', 'w')
for list in followers_list:
f3.write(list + '\n')
print(f'Got: {len(followers_list)} usernames of {amount}. Saved to file.')
time.sleep(random.randrange(3, 5))
except Exception as ex:
print(ex)
self.close_browser()
return followers_list
bot = Instagram(bot_username, bot_password)
bot.login()
followers = bot.get_followers(profiles, amount)

Whenever I look for an element in a page, even if it exists, it is still coming up as nonexistent

So I just started coding with Selenium and I made this program that goes onto the website JKLM.fun and it plays the game. Lately, I've been trying to type in the chat but I keep getting this error:
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element
This is the code I am running:
chat = Driver.find_element(by=By.TAG_NAME, value="textarea")
And this is what I am trying to access:
And before you say to use XPATH or CSS selector or access the DIV above, none of those worked. If you need all my code I'll just put it below this. Can someone please please help me? I have been stuck on this forever!
import random
import time
import re
import keyboard
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#Variables
Code = "JFEV"
Username = "Glitch BOT"
legitMode = False
totalLegitMode = False
lesslegitmode = False
botmode = False
Word = ""
Driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
usedWords = []
joinedGame = False
invalid = open("keyboardTest/invalid.txt", "a")
#Functions
def findWord(Prompt):
notvalid = open("keyboardTest/invalid.txt").read().split('\n')
global usedWords
Words = open("keyboardTest/bigtosmall.txt").read().split("\n")
bestword = ""
for o in range(len(Words)):
if Prompt.lower() in Words[o] and Words[o] not in usedWords and Words[o] not in notvalid:
bestword = Words[o]
usedWords.append(bestword)
break
# while not (Prompt.lower() in Word):
# Word = (random.choice(Words))
# if Prompt.lower() in Word and not (Word in usedWords):
# usedWords.append(Word)
# break
if bestword == "":
print('No Word Found For:', Prompt)
return bestword
def joinServer():
global Code
if Code == "":
Driver.get("https://jklm.fun")
while Driver.current_url == "https://jklm.fun/":
pass
return
else:
Driver.get(f"https://jklm.fun/{Code}")
return
def joinGame():
global joinedGame
while joinedGame == False:
try:
joinBox = Driver.find_element(by=By.CLASS_NAME, value="join")
joinButton = joinBox.find_element(by=By.TAG_NAME, value="button")
ActionChains(Driver).move_to_element(joinButton).click(joinButton).perform()
joinedGame = True
except:
pass
#Code
joinServer()
OK = Driver.find_element(by=By.CLASS_NAME, value="line")
Driver.implicitly_wait(5)
usernameBox = OK.find_element(by=By.TAG_NAME, value="input")
Driver.implicitly_wait(5)
while True:
if usernameBox.get_attribute("value") != "":
time.sleep(0.1)
usernameBox.send_keys(Keys.BACK_SPACE)
else:
usernameBox.send_keys(Username)
usernameBox.send_keys(Keys.RETURN)
break
iFrame = Driver.find_element(by=By.TAG_NAME, value="iframe")
Driver.switch_to.frame(iFrame)
joinGame()
print('JOINED THE GAME')
time.sleep(2)
print('AFTER DELAY')
try:
chat = Driver.find_element(by=By.TAG_NAME, value="textarea")
chat.clear()
except:
print('DID NOT WORK!!!! LLLL')
print("DEFINED CHAT!")
chat.send_keys('Testing')
print("SAID TESTING!")
chat.send_keys(Keys.RETURN)
print("PRINTED IT OUT!")
while joinedGame == True:
try:
#time.sleep(0.3)
joinBox = Driver.find_element(by=By.CLASS_NAME, value="join")
if not joinBox.is_displayed():
Player = Driver.find_element(by=By.CLASS_NAME, value="player")
selfTurn = Driver.find_element(by=By.CLASS_NAME, value="selfTurn")
if Player.text == "" and selfTurn.is_displayed():
Input = selfTurn.find_element(by=By.CLASS_NAME, value="styled")
Prompt = Driver.find_element(by=By.CLASS_NAME, value="syllable").text
print("Current Prompt is:",Prompt)
guess = findWord(Prompt)
print("The guess for that prompt is:", guess)
if legitMode:
time.sleep(random.uniform(0.3,0.8))
for i in range(len(guess)):
time.sleep(random.uniform(0.01,.12))
Input.send_keys(guess[i])
elif totalLegitMode:
time.sleep(random.uniform(0.2,1))
for i in range(len(guess)):
time.sleep(random.uniform(0.05,.14))
Input.send_keys(guess[i])
elif lesslegitmode:
time.sleep(random.uniform(0.1,.6))
for i in range(len(guess)):
time.sleep(random.uniform(0.02,.11))
Input.send_keys(guess[i])
else:
Input.send_keys(guess)
Input.send_keys(Keys.RETURN)
usedWords.append(guess)
print("just used word:", guess)
if not botmode:
time.sleep(.2)
if selfTurn.is_displayed() and Driver.find_element(by=By.CLASS_NAME, value="syllable").text == Prompt:
# a = open("keyboardTest/invalid.txt").read().split('\n')
# if guess not in a: #if its not already in the list
invalid.write('\n')
invalid.write(guess) #if word didn't work, put it into invalid list then ill manually check it
invalid.close()
invalid = open("keyboardTest/invalid.txt", "a")
# guess = findWord(Prompt)
# print(guess)
else:
usedWords = []
joinButton = joinBox.find_element(by=By.TAG_NAME, value="button")
ActionChains(Driver).move_to_element(joinButton).click(joinButton).perform()
except Exception as e:
pass

I figured it out. All you have to do is switch to parent frame before accessing chatbox. That is where it is located. It isn't located in the iframe.
Driver.switch_to.parent_frame() #check for commands
And you're good to go! (Don't forget to switch back to Iframe when done accessing chatbox)

Python Selenium Script:

I wrote a script to save LinkedIn information like: name, last name, graduated university and most important link to LinkedIn script. My script is using Selenium and chromedriver to enter LinkedIn and then scrape. My problem is with saving profile links. Links aren't scraping properly. Here's my code:
import csv
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.keys import Keys
import parameters
import re
class LinkedIn():
def __init__(self):
self.driver = webdriver.Chrome()
self.people_ls_dic = []
self.csv_name_colums = ["name","degree_connection","zawod","region","opis","firma","link"]
def login(self):
self.driver.get("http://www.linkedin.com/login")
sleep(3)
username = self.driver.find_element_by_name('session_key')
username.send_keys(parameters.linkedin_username)
password = self.driver.find_element_by_name('session_password')
password.send_keys(parameters.linkedin_password)
sign_in_button = self.driver.find_elements_by_xpath('//*[#class="btn__primary--large from__button--floating mercado-button--primary"]')
sign_in_button[0].click()
sleep(5)
def neville_try(self):
sleep(3)
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
profiles = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[2]/div/div/div/div[2]/ul')
profiles = profiles.find_elements_by_css_selector('li')
profiles = [(i.text, i.find_element_by_xpath('//*[#data-control-name="entity_result"]').get_attribute('href')) for i in profiles]
print("\n\n")
info_ls = []
for profile, link in profiles:
info_ls.append( (profile.split('\n'), link) )
for iteam, link in info_ls:
if 'Learn more' in iteam:
info_ls.remove(iteam)
print(info_ls)
info_ls = [(iteam, link) for iteam, link in info_ls if iteam != ['']]
for info, link in info_ls:
if info[0] == info[1]:
info.remove(info[1])
try:
name = info[0]
degree_connection = info[2]
zawod = info[3]
region = info[4]
opis = info[5]
opis_f = opis.replace(","," ")
list_of_user_data = [name, zawod, opis_f]
for data in list_of_user_data:
try:
comp = re.findall('at ([a-zA-Z0-9]+)',data)
firma = comp[0]
break
except:
continue
if comp == []:
firma = "brak_danych"
self.people_ls_dic.append({"name":name,"degree_connection":degree_connection,"zawod":zawod,"region":region,"opis":opis,"firma":firma,"link":link})
except:
pass
def go_home(self):
home = self.driver.find_element_by_xpath('//*[#id="inbug-nav-item"]/a')
home.click()
def next_page(self):
sleep(3)
next_p = self.driver.find_element_by_xpath('//*[#aria-label="Next"]')
next_p.click()
def open_people(self):
self.driver.get("https://www.linkedin.com/search/results/people/?origin=DISCOVER_FROM_SEARCH_HOME")
sleep(2)
search_bar = self.driver.find_element_by_xpath('//*[#class="search-global-typeahead__input always-show-placeholder"]')
search_bar.send_keys(parameters.search_query)
search_bar.send_keys(Keys.ENTER)
sleep(3)
def filter_company(self):
cl = self.driver.find_element_by_xpath('//*[#aria-label="Current company filter. Clicking this button displays all Current company filter options."]')
cl.click()
for comp in parameters.list_of_comp:
text = self.driver.find_element_by_xpath('//*[#placeholder="Add a company"]')
text.send_keys(comp)
sleep(1)
filt = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[1]/div/div/div[2]/div/div[2]')
sleep(0.2)
filt.click()
sleep(1)
apply = self.driver.find_element_by_xpath('/html/body/div[7]/div[3]/div/div[1]/nav/div/div[1]/div/div[2]/ul/li[5]/div/div/div/div[1]/div/form/fieldset/div[2]/button[2]')
apply.click()
sleep(1)
def close(self):
self.driver.close()
def write_to_csv(self):
csv_file = "neville.csv"
with open(csv_file, 'w', encoding="utf-8", newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = self.csv_name_colums)
writer.writeheader()
for data in self.people_ls_dic:
writer.writerow(data)
scrypt = LinkedIn()
scrypt.login()
scrypt.open_people()
ls = range(parameters.ilosc_stron)
scrypt.filter_company()
for i in sorted(ls,reverse=True):
scrypt.neville_try()
if i == 1:
break
scrypt.next_page()
scrypt.write_to_csv()
scrypt.close()
Ofc I have file with parameters and i looks' like this:
linkedin_username = ""
linkedin_password = ""
search_query = 'vcloud director'
list_of_comp = ['Microsoft']
ilosc_stron = 2 //number of pages to click on

Instaloader get_followers issue

So I wrote this code to get the list of followers on Instagram using instaloader library in python :
login_name = 'beyondhelloworld'
target_profile = 'femindharamshi'
# OR
#import sys
#target_profile = sys.argv[1] # pass in target profile as argument
from instaloader import Instaloader, Profile
loader = Instaloader()
# login
try:
loader.load_session_from_file(login_name)
except FileNotFoundError:
loader.context.log("Session file does not exist yet - Logging in.")
if not loader.context.is_logged_in:
loader.interactive_login(login_name)
loader.save_session_to_file()
profile = Profile.from_username(loader.context, target_profile)
followers = profile.get_followers()
loader.context.log()
loader.context.log('Profile {} has {} followers:'.format(profile.username, profile.followers))
loader.context.log()
for follower in followers:
loader.context.log(follower.username, flush=True)
But I keep getting this error :
Loaded session from /Users/femindharamshi/.config/instaloader/session-beyondhelloworld.
Traceback (most recent call last):
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 597, in _obtain_metadata
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
KeyError: 'graphql'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "il.py", line 20, in <module>
profile = Profile.from_username(loader.context, target_profile)
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 552, in from_username
profile._obtain_metadata() # to raise ProfileNotExistException now in case username is invalid
File "/Users/femindharamshi/Documents/instaload/env/lib/python3.7/site-packages/instaloader/structures.py", line 606, in _obtain_metadata
', '.join(similar_profiles[0:5]))) from err
instaloader.exceptions.ProfileNotExistsException: Profile femindharamshi does not exist.
The most similar profile is: femindharamshi.
How do I solve this issue?
The output says that profile "femindharamshi" does not exist but that is what my profile is. It also says :
The most similar profile is: femindharamshi.

import instaloader
import random
import os
dir_path_driver = os.getcwd()
def username_password():
listusername = []
with open("./username.txt","r") as usernames:
for username in usernames:
listusername.append((username.rstrip("\n")).split(":"))
if len(listusername) == 1:
select = 0
else:
select = random.randint(0,len(listusername))
return listusername[select][0],listusername[select][1]
def get_followers():
L = instaloader.Instaloader()
# Login or load session
username,password =username_password()
listfile = os.listdir(dir_path_driver+"/cookie")
for i in listfile:
if i != f"{username}":
L.login(username, password)
L.save_session_to_file(filename=dir_path_driver+"/cookie/"+f"{username}")
else:
L.load_session_from_file(filename=dir_path_driver+"/cookie/"+f"{username}",username = username)
file = open("prada_followers.txt","a+")
profile = instaloader.Profile.from_username(L.context, "idinstagram")
for followee in profile.get_followers():
username = followee.username
file.write(username + "\n")
file.close()

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
class InstaBot:
"""InstaBot can login, can return unfollowers that don't
follow you back.
Object requires two args.
'Username' & 'Password' """
def __init__(self,username,pw):
self.username = username
self.pw = pw
self.driver = webdriver.Chrome(executable_path='chromedriver.exe')
self.base_url = "https://instagram.com"
self.driver.get("{}".format(self.base_url))
sleep(2)
self.driver.maximize_window()
self.login()
def login(self):
self.driver.find_element_by_xpath("//input[#name=\"username\"]")\
.send_keys(self.username)
self.driver.find_element_by_xpath("//input[#name=\"password\"]")\
.send_keys(self.pw)
self.driver.find_element_by_xpath("//button[#type=\"submit\"]")\
.click()
sleep(10)
self.driver.find_element_by_xpath("//button[contains(text(), 'Not Now')]")\
.click()
sleep(2)
def get_unfollowers(self):
self.driver.find_element_by_xpath("//a[contains(#href, '/{}')]".format(self.username))\
.click()
sleep(3)
self.driver.find_element_by_xpath("//a[contains(#href, '/following')]")\
.click()
sleep(2)
following = self._get_names()
self.driver.find_element_by_xpath("//a[contains(#href, '/followers')]")\
.click()
sleep(2)
followers = self._get_names()
not_following_back = [user for user in following if user not in followers]
return not_following_back
## suggetions = self.driver.find_element_by_xpath('//h4[contains(text(), Suggetions)]')
## self.driver.execute_script('arguments[0].scrollIntoView()',suggetions)
def _get_names(self):
scroll_box = self.driver.find_element_by_xpath("/html/body/div[4]/div/div[2]")
last_ht , ht = 0,1
while last_ht != ht:
last_ht = ht
sleep(1)
ht = self.driver.execute_script("""
arguments[0].scrollTo(0,arguments[0].scrollHeight);
return arguments[0].scrollHeight;
""", scroll_box)
links = scroll_box.find_elements_by_tag_name('a')
names = [name.text for name in links if name.text != '']
sleep(2)
self.driver.find_element_by_xpath("/html/body/div[4]/div/div[1]/div/div[2]/button")\
.click()
return names
def navigate_to_user(self,user):
self.driver.get("{}/{}".format(self.base_url,user))
def scroll_down(self):
last_height = self.driver.execute_script("return document.body.scrollHeight")
while True:
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
sleep(2)
new_height = self.driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
my_bot = InstaBot(Username,Password)
##unfollowers = my_bot.get_unfollowers() #will return a list
my_bot.navigate_to_user(Any User Name that you follow) #Will return your friend's followers list

Save screenshot on test failure in python with 'splinter'

I try save screenshot on test failure in python with 'splinter'
1) This code works for Selenium:
# #pytest.fixture(scope="function")
# def browser(request):
# options = Options()
# options.add_argument("--headless")
# options.add_argument("--start-maximized")
# # browser = webdriver.Chrome(ChromeDriverManager().install())
# browser = webdriver.Chrome(options=options)
# browser.implicitly_wait(5)
# failed_before = request.session.testsfailed
# yield browser
# if request.session.testsfailed != failed_before:
# test_name = request.node.name
# take_screenshot(browser, test_name)
# browser.quit()
#
# def take_screenshot(browser, test_name):
# screenshots_dir = "C:\\Users\Ark\\PycharmProjects\\Gop\\Reports"
# screenshot_file_path = "{}/{}.png".format(screenshots_dir, test_name)
# browser.save_screenshot(
# screenshot_file_path)
But doesn't works with Splinter (browser don't close and don't make screenshot):
#pytest.fixture(scope="function")
def browser(request):
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
browser = Browser("chrome", headless=False, incognito=True, options=options)
failed_before = request.session.testsfailed
yield browser
if request.session.testsfailed != failed_before:
test_name = request.node.name
take_screenshot(browser, test_name)
browser.quit()
def take_screenshot(browser, test_name):
screenshots_dir = "C:\\Users\Ark\\PycharmProjects\\Gop\\Reports"
screenshot_file_path = "{}/{}.png".format(screenshots_dir, test_name)
browser.save_screenshot(
screenshot_file_path)
print("\n!!! SCREENSHOT OF FAILURE '" + test_name + "' SAVED INTO: '" + screenshots_dir + "' WITH NAME '" + test_name + "'")
2) Or how do this function working? (pytest-splinter)
splinter_make_screenshot_on_failure
https://github.com/pytest-dev/pytest-splinter
Can you help?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

web scraping with python, with navigation controller - python

Related

Using Selenium to get a list of Instagram followers

Whenever I look for an element in a page, even if it exists, it is still coming up as nonexistent

Python Selenium Script:

Instaloader get_followers issue

Save screenshot on test failure in python with 'splinter'

Categories

Resources