Extracting user data from stackoverflow using Selenium - python

I'm pretty new to this web scraping (Data extraction) stuff. I want to extract the user's reputation from his stackoverflow account. I'm using Selenium. I've successfully logged in but I can't get the data from the next url, which is http://stackoverflow.com
This is my code:
from selenium import webdriver
from selenium.webdriver.support import ui
def page_is_loaded(driver):
return driver.find_element_by_tag_name("body") != None
chromedriver = 'C:\\chromedriver.exe'
browser = webdriver.Chrome(chromedriver)
browser.get('https://stackoverflow.com/users/login')
username = browser.find_element_by_id("email")
password = browser.find_element_by_id("password")
username.send_keys("emailID")
password.send_keys("password")
browser.find_element_by_name("submit-button").click()
wait = ui.WebDriverWait(browser, 10)
wait.until(page_is_loaded)
print browser.current_url
It works, I get redirected to the next page, but the last command still prints:
https://stackoverflow.com/users/login
Thanks in advance. I'm sure I'm missing something little.

It takes some time to update the browser.current_url after redirecting. You can use either browser.refresh() or time.sleep() to get the updated value.
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://stackoverflow.com/users/login')
username = browser.find_element_by_id("email")
password = browser.find_element_by_id("password")
username.send_keys("emailID")
password.send_keys("password")
browser.find_element_by_name("submit-button").click()
browser.refresh()
print browser.current_url
Hope, the output of the below code could help you understand this better.
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://stackoverflow.com/users/login')
username = browser.find_element_by_id("email")
password = browser.find_element_by_id("password")
username.send_keys("emailID")
password.send_keys("password")
browser.find_element_by_name("submit-button").click()
for i in range(5):
print browser.current_url, " - loop ", i
time.sleep(1)
print browser.current_url

Related

"login" is not defined by Pylance (web scraping)

login_xpath has no error but in my last line of code login has an error.
next was written the same way so I don't understand why next doesn't have an error but login does.
Code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
import time
def account_info():
with open('account_info.txt', 'r') as f:
info = f.read().split()
email = info[0]
password = [1]
return email, password
email, password = account_info()
options = Options()
options.add_argument("start.maximized")
driver = webdriver.Chrome(options=options)
driver.get("https://twitter.com/i/flow/login")
email_xpath = '//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div[2]/label/div/div[2]/div/input'
next_xpath = '//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div'
password_xpath = '//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div[2]/div/label/div/div[2]/div/input'
login_xpath = '//*[#id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div'
time.sleep(1)
driver.find_element_by_xpath(email_xpath).send_keys(email)
time.sleep(0.5)
driver.find_element_by_xpath(next_xpath).click(next)
time.sleep(0.5)
driver.find_element_by_xpath(password_xpath).send_keys(password)
time.sleep(0.5)
driver.find_element_by_xpath(login_xpath).click(login)
In selenium .click() doesn't require you to pass anything. That's probably the issue. Maybe it worked in the first instance as the actual text in the Twitter login flow is "Next" for the next button, but is "Log in" with a space in between for the login button. Just try this change:
driver.find_element_by_xpath(email_xpath).send_keys(email)
time.sleep(0.5)
driver.find_element_by_xpath(next_xpath).click(next)
time.sleep(0.5)
driver.find_element_by_xpath(password_xpath).send_keys(password)
time.sleep(0.5)
driver.find_element_by_xpath(login_xpath).click(login)
To
driver.find_element_by_xpath(email_xpath).send_keys(email)
time.sleep(0.5)
driver.find_element_by_xpath(next_xpath).click()
time.sleep(0.5)
driver.find_element_by_xpath(password_xpath).send_keys(password)
time.sleep(0.5)
driver.find_element_by_xpath(login_xpath).click()
Let me know if that solves the issue.

How to click on the first result on a dynamic page using python selenium?

I am trying to click on the first result on this page, but all the options I tried didn't work.
Firstly I just login into the website with email: kocianlukyluk#gmail.com and password: Redfinpython06. Here is the code for it:
driver = webdriver.Chrome("C:\\Users\\kocia\\OneDrive\\Plocha\\Python\\nastaveni\\chromedriver.exe")
driver.get('https://www.redfin.com/myredfin/favorites')
email = 'kocianlukyluk#gmail.com'
password = 'Redfinpython06'
time.sleep(3)
driver.find_element_by_xpath(
'//*[#id="content"]/div[6]/div/div[2]/div/div/form/span[1]/span/div/input').send_keys(email)
time.sleep(3)
driver.find_element_by_xpath(
'//*[#id="content"]/div[6]/div/div[2]/div/div/form/span[2]/span/div/input').send_keys(password)
time.sleep(3)
sing_up = driver.find_element_by_css_selector('button[type=submit]')
sing_up.click()
But the problem is after login i can't click on the first result on the page.
Here is what i tried:
result = driver.find_elements_by_xpath("//*[#id="content"]/div[10]/div/div[5]/div/div[2]/div/div")[0]
result.find_element_by_xpath("//*[#id="content"]/div[10]/div/div[5]/div/div[2]/div/div/div[1]").click()
or
result = driver.find_elements_by_xpath("//*[#id="content"]/div[10]/div/div[5]/div/div[2]/div/div")[0]
result.click()
or
result = driver.find_element_by_xpath("//*[#id="content"]/div[10]/div/div[5]/div/div[2]/div/div/div[1]")
result.click()
Thank you so much for help.
I hope that is a dummy email and password that you are just using for testing purposes :)
Below clicks on the first house picture in the list. I also cleaned up your email and password xpath designations. You can see how much easier it is to grab them by name
Also, you may want to put proper wait methods around these find elements. Using sleep generally is not recommended
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
driver = webdriver.Chrome()
driver.get('https://www.redfin.com/myredfin/favorites')
email = 'kocianlukyluk#gmail.com'
password = 'Redfinpython06'
sleep(3)
driver.find_element_by_name(
'emailInput').send_keys(email)
sleep(3)
driver.find_element_by_name(
'passwordInput').send_keys(password)
sleep(3)
sing_up = driver.find_element_by_css_selector('button[type=submit]')
sing_up.click()
sleep(3)
first_house = driver.find_element_by_xpath("//div[#class='FavoritesHome'][1]//img")
first_house.click()

Message: Unable to locate elemen? [duplicate]

I want to log in to instagram using selenium, but I can't seem to enter values into the fields.
Here's my script:
#go to this address
browser.get('https://www.instagram.com')
#sleep for 1 seconds
sleep(1)
#find the 'login' button on homepage
login_elem = browser.find_element_by_xpath(
'//*[#id="react-root"]/section/main/article/div[2]/div[2]/p/a')
#navigate to login page
login_elem.click()
Having trouble from here onwards:
#locate the username field within the form
unform = browser.find_element_by_xpath(
'//*[#id="f3b8e6724a27994"]')
#clear the field
textunform.clear()
#enter 'test' into field
unform.send_keys('test')
There is a trick in this, instead of searching for the Button (Log In) there is a better way to log in without it. how? let's see:
Import the packages you need:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
#Select the driver, In our case we will use Chrome.
chromedriver_path = 'chromedriver.exe' # Change this to your own chromedriver path!
webdriver = webdriver.Chrome(executable_path=chromedriver_path)
sleep(2)
webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher')
sleep(3)
username = webdriver.find_element_by_name('username')
username.send_keys('yourUsername')
password = webdriver.find_element_by_name('password')
password.send_keys('yourPassword')
#instead of searching for the Button (Log In) you can simply press enter when you already selected the password or the username input element.
submit = webdriver.find_element_by_tag_name('form')
submit.submit()
You can copy the code and run it directly (even without a real username or password)
To get the webdriver (chromedriver.exe) from ChromeDriver
The instagram is applying some method to leave the dynamic id, xpath and css, every time a reload happens on the page the attributes change their values, being more difficult to click or to set values:
I solved it:
#Locate the username field
unform = browser.find_element_by_name("username")
#Locate the password field
pwform = browser.find_element_by_name('password')
ActionChains(browser)\
.move_to_element(unform).click()\
.send_keys('test')\
.move_to_element(pwform).click()\
.send_keys('test')\
.perform()
#Locate login button
login_button = browser.find_element_by_xpath(
'//*[#id="react-root"]/section/main/article/div[2]/div[1]/div/form/span/button')
#Click login button
login_button.click()
The username field on Instagram is a ReactJS so you have to induce WebDriverWait and then invoke send_keys() method as follows :
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
browser = webdriver.Chrome(chrome_options=options, executable_path=r'C:\path\to\chromedriver.exe')
browser.get('https://www.instagram.com')
login_elem = browser.find_element_by_xpath('//*[#id="react-root"]/section/main/article/div[2]/div[2]/p/a')
login_elem.click()
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']"))).send_keys("anon")
Browser Screenshot :
In this case IMHO it's better to use this: browser.find_element_by_name("Bermil18") / browser.find_element_by_name("1q56y3w5t9k0p3es8i1q")
Here's my solution for Sign In on Instagram
def login(self, username, password):
""" Methods that log in to Instagram by taking user's credentials as parameters"""
self.driver.get("https://www.instagram.com/accounts/login/")
try:
self.driver.find_element_by_xpath("//input[#name=\"username\"]").send_keys(username) # filling username
self.driver.find_element_by_xpath("//input[#name=\"password\"]").send_keys(password) # filling password
self.driver.find_element_by_xpath("//button[#type=\"submit\"]").click() # submit form
except NoSuchElementException:
print("Failed to log in: Unable to locate Username/Password/LogIn element(s)")
# If login is unsuccessful, Instagram will show a message "Sorry, your password was incorrect. Please double-check your password."
success = self.driver.find_elements_by_xpath("//p[#id = \"slfErrorAlert\"]")
if len(success) == 0:
print("Login successful!")
else:
print("Sorry, sign in unsuccessful. Please double-check your credentials.")
See my Github repo for more: https://github.com/mlej8/InstagramBot
def login(username,password):
driver.get(base_url)
time.sleep(3)
detail = driver.find_elements_by_class_name('_2hvTZ')
detail[0].clear()
detail[1].clear()
detail[0].send_keys(username)
detail[1].send_keys(password)
driver.find_element_by_class_name('L3NKy').click()
time.sleep(3)
for i in driver.find_elements_by_tag_name('button'):
if i.text=='Not Now':
i.click()
break
time.sleep(3)
driver.find_element_by_class_name('HoLwm').click()
base url is intagram url .
I have a made an instabot and you can find the code for logging in ,follow, unfollow ,like ,check posts in recent day ,etc in the following github link.
https://github.com/Devanshchowdhury2212/Instagram-Web-scraping-
This worked for me:
def login(self, username):
self.driver = webdriver.Chrome()
self.driver.get('https://www.instagram.com/')
sleep(1)
username_input = self.driver.find_element_by_xpath(
"//input[#name='username']")
username_input.send_keys(username)
password_input = self.driver.find_element_by_xpath(
"//input[#name='password']")
password_input.send_keys(pw)
submit_btn = self.driver.find_element_by_xpath(
"//button[#type='submit']")
submit_btn.click()
sleep(2)
save_your_login_info_not_now = self.driver.find_element_by_xpath("/html/body/div[1]/section/main/div/div/div/div/button")
save_your_login_info_not_now.click()
You will notice that i am sending the variable pw instead of my actual password. This is for security reasons. Make a new file called secrets.py and inside it, declare your password in the following format:
pw = '*********'
Try to select the field with
unform = browser.find_element_by_xpath("//input[#name='username']")
unform.send_keys(<username>)
and for password
browser.find_element_by_xpath("//input[#name='password']")

Find elements by input type

I'm trying Python and Selenium. My goal is to log myself into Discord (https://discordapp.com/login. But here is the problem. I can't manage to get the email and password box selected. But the worst part is trying to select a textbox on a server... I tried everything, even locating by XPath, but I can't seem to do it right. Also, doing it on ATOM is probably not the best idea since I don't get any error messages :P. Here is a snippet to select the email textbox.
from selenium
import webdriver
from selenium.webdriver.common.keys
import Keys
browser = webdriver.Firefox()
browser.get('https://discordapp.com/login')
assert 'discordapp' in browser.title
elem = browser.find_element_by_name('textarea')# this is the part where i need help
elem.send_keys('test' + Keys.ENTER)
For email this css selector should work :
input[type='email']
For password :
input[type='password']
I've tested this code :
browser.get("https://discordapp.com/login")
elem = browser.find_element_by_css_selector("input[type='email']")# this is the part where i need help
elem.send_keys("itsolidude#imail.com")
elem1 = browser.find_element_by_css_selector("input[type='password']")# this is the part where i need help
elem1.send_keys("password")
login_button = browser.find_element_by_xpath("//div[text()='Login']/parent::button")
login_button.click()
This worked fine on my machine.
you need to check the div container and add them into the xpath.
Try the following code and please debug the indents, in case that stackoverflow is not transferring them properly (well, I don't know how to do it nice and correctly.)
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
class loginPage():
def test(self):
baseUrl = 'https://discordapp.com/login'
driver = webdriver.Firefox(executable_path="G:\\webdriver/geckodriver.exe")
driver.maximize_window()
driver.implicitly_wait(5)
driver.get(baseUrl)
mail = driver.find_element(By.XPATH, "//div[3]/div[1]/div/input[contains(#type,'email')]")
time.sleep(5)
mail.send_keys("test#gmail.com")
time.sleep(3)
print("Enter mail adress")
password = driver.find_element(By.XPATH, "//div[3]/div[2]/div/input[contains(#type,'password')]")
time.sleep(5)
password.send_keys("123456789")
time.sleep(3)
print("Enter password")
time.sleep(10)
driver.quit()
ff = loginPage()
ff.test()
Login To Discord Website using Python and Selenium:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
driver = webdriver.Chrome()
driver.get("https://discord.com/login")
time.sleep(6)
username_input = driver.find_element_by_name('email')
username_input.send_keys("enter-your-username-here")
password_input = driver.find_element_by_name('password')
password_input.send_keys("Enter-your-password-here")
login_button = driver.find_element_by_xpath('//*[#id="app-mount"]/div[2]/div/div[2]/div/div/form/div/div/div[1]/div[3]/button[2]')
login_button.click()

Filling in login forms in Instagram using selenium and webdriver (chrome) python OSX

I want to log in to instagram using selenium, but I can't seem to enter values into the fields.
Here's my script:
#go to this address
browser.get('https://www.instagram.com')
#sleep for 1 seconds
sleep(1)
#find the 'login' button on homepage
login_elem = browser.find_element_by_xpath(
'//*[#id="react-root"]/section/main/article/div[2]/div[2]/p/a')
#navigate to login page
login_elem.click()
Having trouble from here onwards:
#locate the username field within the form
unform = browser.find_element_by_xpath(
'//*[#id="f3b8e6724a27994"]')
#clear the field
textunform.clear()
#enter 'test' into field
unform.send_keys('test')
There is a trick in this, instead of searching for the Button (Log In) there is a better way to log in without it. how? let's see:
Import the packages you need:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
#Select the driver, In our case we will use Chrome.
chromedriver_path = 'chromedriver.exe' # Change this to your own chromedriver path!
webdriver = webdriver.Chrome(executable_path=chromedriver_path)
sleep(2)
webdriver.get('https://www.instagram.com/accounts/login/?source=auth_switcher')
sleep(3)
username = webdriver.find_element_by_name('username')
username.send_keys('yourUsername')
password = webdriver.find_element_by_name('password')
password.send_keys('yourPassword')
#instead of searching for the Button (Log In) you can simply press enter when you already selected the password or the username input element.
submit = webdriver.find_element_by_tag_name('form')
submit.submit()
You can copy the code and run it directly (even without a real username or password)
To get the webdriver (chromedriver.exe) from ChromeDriver
The instagram is applying some method to leave the dynamic id, xpath and css, every time a reload happens on the page the attributes change their values, being more difficult to click or to set values:
I solved it:
#Locate the username field
unform = browser.find_element_by_name("username")
#Locate the password field
pwform = browser.find_element_by_name('password')
ActionChains(browser)\
.move_to_element(unform).click()\
.send_keys('test')\
.move_to_element(pwform).click()\
.send_keys('test')\
.perform()
#Locate login button
login_button = browser.find_element_by_xpath(
'//*[#id="react-root"]/section/main/article/div[2]/div[1]/div/form/span/button')
#Click login button
login_button.click()
The username field on Instagram is a ReactJS so you have to induce WebDriverWait and then invoke send_keys() method as follows :
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
options = Options()
options.add_argument("start-maximized")
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")
browser = webdriver.Chrome(chrome_options=options, executable_path=r'C:\path\to\chromedriver.exe')
browser.get('https://www.instagram.com')
login_elem = browser.find_element_by_xpath('//*[#id="react-root"]/section/main/article/div[2]/div[2]/p/a')
login_elem.click()
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']"))).send_keys("anon")
Browser Screenshot :
In this case IMHO it's better to use this: browser.find_element_by_name("Bermil18") / browser.find_element_by_name("1q56y3w5t9k0p3es8i1q")
Here's my solution for Sign In on Instagram
def login(self, username, password):
""" Methods that log in to Instagram by taking user's credentials as parameters"""
self.driver.get("https://www.instagram.com/accounts/login/")
try:
self.driver.find_element_by_xpath("//input[#name=\"username\"]").send_keys(username) # filling username
self.driver.find_element_by_xpath("//input[#name=\"password\"]").send_keys(password) # filling password
self.driver.find_element_by_xpath("//button[#type=\"submit\"]").click() # submit form
except NoSuchElementException:
print("Failed to log in: Unable to locate Username/Password/LogIn element(s)")
# If login is unsuccessful, Instagram will show a message "Sorry, your password was incorrect. Please double-check your password."
success = self.driver.find_elements_by_xpath("//p[#id = \"slfErrorAlert\"]")
if len(success) == 0:
print("Login successful!")
else:
print("Sorry, sign in unsuccessful. Please double-check your credentials.")
See my Github repo for more: https://github.com/mlej8/InstagramBot
def login(username,password):
driver.get(base_url)
time.sleep(3)
detail = driver.find_elements_by_class_name('_2hvTZ')
detail[0].clear()
detail[1].clear()
detail[0].send_keys(username)
detail[1].send_keys(password)
driver.find_element_by_class_name('L3NKy').click()
time.sleep(3)
for i in driver.find_elements_by_tag_name('button'):
if i.text=='Not Now':
i.click()
break
time.sleep(3)
driver.find_element_by_class_name('HoLwm').click()
base url is intagram url .
I have a made an instabot and you can find the code for logging in ,follow, unfollow ,like ,check posts in recent day ,etc in the following github link.
https://github.com/Devanshchowdhury2212/Instagram-Web-scraping-
This worked for me:
def login(self, username):
self.driver = webdriver.Chrome()
self.driver.get('https://www.instagram.com/')
sleep(1)
username_input = self.driver.find_element_by_xpath(
"//input[#name='username']")
username_input.send_keys(username)
password_input = self.driver.find_element_by_xpath(
"//input[#name='password']")
password_input.send_keys(pw)
submit_btn = self.driver.find_element_by_xpath(
"//button[#type='submit']")
submit_btn.click()
sleep(2)
save_your_login_info_not_now = self.driver.find_element_by_xpath("/html/body/div[1]/section/main/div/div/div/div/button")
save_your_login_info_not_now.click()
You will notice that i am sending the variable pw instead of my actual password. This is for security reasons. Make a new file called secrets.py and inside it, declare your password in the following format:
pw = '*********'
Try to select the field with
unform = browser.find_element_by_xpath("//input[#name='username']")
unform.send_keys(<username>)
and for password
browser.find_element_by_xpath("//input[#name='password']")

Categories