I'm new in python. Want to login on my account
My code is:
driver = webdriver.Firefox()
session = requests.Session()
url = "http://tterminal.info/"
response = session.post(url)
# <div class="auth">
authForm = driver.find_element_by_class_name("auth")
# <div class="login">
loginForm = authForm.find_element_by_class_name("login")
# Enter login
login = loginForm.find_element_by_name("login")
login.clear()
login.send_keys("mylogin")
# Enter pass
pswd = loginForm.find_element_by_name("pass")
pswd.send_keys("mypassword")
# Click login
loginForm.find_element_by_class_name("submit").click()
But it givesme Unable to find element .auth
Where is my wrong?
While working with Selenium you don't have to use requests.Session() module and your code block can be optimized as follows :
from selenium import webdriver
driver = webdriver.Firefox(executable_path=r'C:\path\to\geckodriver.exe')
url = "http://tterminal.info"
driver.get(url)
login = driver.find_element_by_css_selector("form[name='login'] input.text[name='login']")
login.clear()
login.send_keys("mylogin")
pswd = driver.find_element_by_css_selector("form[name='login'] input.text[name='pass']")
pswd.send_keys("mypassword")
driver.find_element_by_css_selector("form[name='login'] input.submit").click()
Related
I am trying to scrape data from this url with Python-Selenium.
ยป https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445
If you watch the network stream you will see that it returns an api on the back end like this https://shopee.co.id/api/v4/item/get?itemid=19137193916&shopid=323047288. How can I get the response returned by this api with selenium?
Solved!
import json
import time
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
# Set up Selenium webdriver
capabilities = DesiredCapabilities.CHROME
capabilities["goog:loggingPrefs"] = {"performance": "ALL"}
options = webdriver.ChromeOptions()
options.binary_location = "/usr/bin/brave"
options.add_argument("--ignore-certificate-errors")
driver = webdriver.Chrome(desired_capabilities=capabilities, options=options)
# Navigate to URL and monitor network flow
url = "https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445"
driver.get(url)
time.sleep(3) # Wait for the page to load
# Find any API requests and print the returned data to the screen
logs = driver.get_log("performance")
for entry in logs:
message = entry.get("message", {})
parsed_message = json.loads(message)
message_dict = parsed_message.get("message", {})
method = message_dict.get("method")
if method == "Network.requestWillBeSent":
request = message_dict.get("params", {}).get("request", {})
url = request.get("url")
if "https://shopee.co.id/api/v4/item/get?itemid=19137193916&shopid=323047288" in url:
response_url = url.replace("request", "response")
response = driver.execute_cdp_cmd(
"Network.getResponseBody", {"requestId": message_dict.get("params", {}).get("requestId")}
)
with open("response.json", "w") as f:
f.write(response.get("body", ""))
I use selenium wire for this. You can do pip install selenium-wire to get it and then import it into your project and use it like so:
from seleniumwire import webdriver
#Sets the Option to disable response encoding
sw_options = {
'disable_encoding': True
}
#Creates driver with selected options
driver = webdriver.Chrome(seleniumwire_options=sw_options)
#Starts selenium wire interceptor to monitor network traffic
driver.request_interceptor = interceptor
#Navigate to page
driver.get('https://shopee.co.id/PANCI-PRESTO-24cm-3.5L-TEFLON-i.323047288.19137193916?sp_atk=7e8e7abc-834c-4f4a-9234-19da9ddb2445&xptdk=7e8e7abc-834c-4f4a-9234-19da9ddb2445')
#Iterate through requests and find the one with the endpoint you need in the url
for a in driver.requests:
if("/api/v4/item/get?itemid=19137193916&shopid=323047288" in a.url):
body = a.response.body
print(body)
We add disable encoding to the options otherwise the body would come back encoded and youd have to decode it manually which can be done like so
body = decode(response.body, response.headers.get('Content-Encoding', 'identity'))
Or done in the browser options as I did.
You can find more information here:
https://pypi.org/project/selenium-wire/#response-objects
I'm attempting to write a Python script that logs in to a website that runs JavaScript and scrape an element from the dashboard page. I'm using mechanize to login to the website and Requests-HTML to scape the data.
I can successfully login to the accounts page using mechanize. But I cannot pass the cookie data to Requests-HTML and continue the session to the dashboard page so I can scrape the data. I can't seem to format the data right to get the website (through Requests-HTML) to accept it.
I did get a version of this script running entirely with Selenium (the code is at the bottom), but I'd prefer to run a script that doesn't require a browser driver that opens a window.
from requests_html import HTMLSession
import mechanize
username = "me#example.com"
password = "12345678"
accts_url = "https://accounts.website.com"
dash_url = "https://dashboard.website.com"
browser = mechanize.Browser()
browser.open(accts_url)
browser.select_form(nr=0)
browser.form['email'] = username
browser.form['password'] = password
browser.submit()
response = browser.open(dash_url)
cookiejar_token = browser.cookiejar
print("mechanize, response:\n", response.read())
print("mechanize, browser.cookiejar:\n", cookiejar_token)
if str(cookiejar_token).startswith('<CookieJar['):
cookiejar_token_str_list = str(cookiejar_token).split(' ')
LBSERVERID_accts = cookiejar_token_str_list[1].lstrip('LBSERVERID=')
accounts_domain = cookiejar_token_str_list[3].rstrip('/>,')
session = cookiejar_token_str_list[5].lstrip('session=')
session_domain = cookiejar_token_str_list[7].rstrip('/>,')
LBSERVERID_dash = cookiejar_token_str_list[9].lstrip('LBSERVERID=')
dashboard_domain = cookiejar_token_str_list[11].rstrip('/>]>')
print("cookiejar_token_str_list:\n", cookiejar_token_str_list)
print("accounts 'LBSERVERID': %s for %s" % (LBSERVERID_accts, accounts_domain))
print("accounts 'session': %s for %s" % (session, session_domain))
print("dashboard 'LBSERVERID': %s for %s" % (LBSERVERID_dash, dashboard_domain))
else:
print("Incompatible token!\n")
# *****Requests_HTML does not communicate with mechanize!
session = HTMLSession()
print ("session.cookies:\n", session.cookies)
# I also made accounts_cookie_dict and session_cookie_dict
dash_cookie_dict = {
'name': 'LBSERVERID',
'value': LBSERVERID_dash,
'domain': dashboard_domain,
'path': '/'
}
# I attempt to manually create the correct cookie and assign it to dash_token, below
dash_token = browser.set_simple_cookie(dash_cookie_dict['name'], dash_cookie_dict['value'], dash_cookie_dict['domain'], dash_cookie_dict['path'])
print("dash_token:", dash_token)
print("cookiejar_token:", cookiejar_token)
print("dash_cookie_dict:\n", dash_cookie_dict)
# *****Attempting to pass the cookie to Requests-HTML below FAILS! :'(
response_obj = session.post(dash_url, cookies=dash_token)
print("response_obj:\n", response_obj)
print("response_obj.cookies from session.post:\n", response_obj.cookies)
response_obj.html.render(sleep=0.5)
print("requests_html, r.html.find('input'):\n", response_obj.html.find('input'))
Terminal Output:
mechanize, response:
b'<!doctype html><html lang="en"><head><script>!function(e***shortened by OP***</html>' ### Output in this field tells me the login by mechanize was successful
mechanize, browser.cookiejar:
<CookieJar[<Cookie LBSERVERID=3**************8 for accounts.example.com/>, <Cookie session=.e***shortened by OP***Y for accounts.example.com/>, <Cookie LBSERVERID=0**************a for dashboard.example.com/>]>
cookiejar_token_str_list:
['<CookieJar[<Cookie', 'LBSERVERID=3************8', 'for', 'accounts.example.com/>,', '<Cookie', 'session=.e***shortened by OP***Y', 'for', 'accounts.example.com/>,', '<Cookie', 'LBSERVERID=0**************a', 'for', 'dashboard.example.com/>]>']
accounts 'LBSERVERID': 3************8 for accounts.example.com
accounts 'session': .e***shortened by OP***Y for accounts.example.com
dashboard 'LBSERVERID': 0**************a for dashboard.example.com
session.cookies:
<RequestsCookieJar[]>
dash_token: None
cookiejar_token: <CookieJar[<Cookie LBSERVERID=3************8 for accounts.example.com/>, <Cookie session=.e***shortened by OP***Y for accounts.example.com/>, <Cookie LBSERVERID=0**************a for dashboard.example.com/>]>
dash_cookie_dict:
{'name': 'LBSERVERID', 'value': '0**************a', 'domain': 'dashboard.example.com', 'path': '/'}
response_obj:
<Response [403]> ### Access denied and it issues a new cookie below
response_obj.cookies from session.post:
<RequestsCookieJar[<Cookie LBSERVERID=a**************3 for dashboard.example.com/>]>
requests_html, r.html.find('input'): ### The output below tells me I'm back on the login page
[<Element 'input' class=('form-control',) id='email' name='email' required='' type='text' value=''>, <Element 'input' class=('form-control',) id='password' name='password' required='' type='password' value=''>, <Element 'input' id='csrf_token' name='csrf_token' type='hidden' value='I***shortened by OP***Y'>]
My Selenium code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
import time
login_post_url = "https://accounts.example.com"
internal_url = "https://dashboard.example.com"
username = "user#email.com"
password = "12345678"
driver = webdriver.Safari(executable_path='/usr/bin/safaridriver') # initialize the Safari driver for Mac
driver.get(login_post_url) # head to login page
driver.find_element("id", "email").send_keys(username)
driver.find_element("id", "password").send_keys(password)
driver.find_element("id", "submit_form").click()
WebDriverWait(driver=driver, timeout=10).until( # wait the ready state to be complete
lambda x: x.execute_script("return document.readyState === 'complete'"))
error_message = "Incorrect username or password."
errors = driver.find_elements(By.CLASS_NAME, "flash-error") # get the errors (if there are)
# print the errors optionally
# for e in errors:
# print(e.text)
if any(error_message in e.text for e in errors): # if we find that error message within errors, then login is failed
print("[!] Login failed")
else:
print("[+] Login successful")
time.sleep(5)
driver.get(internal_url)
time.sleep(5)
element = driver.find_element(By.XPATH, '/html/........./div/p')
scraped_variable = element.get_attribute('innerHTML')
print("scraped_variable:", scraped_variable)
I am working on a python web scraping project. The website I am trying to scrape data from contains info about all the medicines sold in India. The website requires a user to login before giving access to this information.
I want to access all the links in this url https://mims.com/india/browse/alphabet/a?cat=drug&tab=brand and store it in an array.
Here is my code for logging into the website
##################################### Method 1
import mechanize
import http.cookiejar as cookielib
from bs4 import BeautifulSoup
import html2text
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Chrome')]
br.open('https://sso.mims.com/Account/SignIn')
# View available forms
for f in br.forms():
print(f)
br.select_form(nr=0)
# User credentials
br.form['EmailAddress'] = <USERNAME>
br.form['Password'] = <PASSWORD>
# Login
br.submit()
print(br.open('https://mims.com/india/browse/alphabet/a?cat=drug&tab=brand').read())
But the problem is that when the credentials are submitted, a middle page pops up with the following information.
You will be redirected to your destination shortly.
This page submits a hidden form and only then is the required end page shown. I want to access the end page. But br.open('https://mims.com/india/browse/alphabet/a?cat=drug&tab=brand').read() accesses the middle page and prints the results.
How do I wait for the middle page to submit the hidden form and then access the contents of the end page?
I've posted a selenium solution below, which works, but after understanding a bit more about the login process, it's possible to login using BeautifulSoup and requests only. Please read the comments on the code.
BeautifulSoup / requests solution
import requests
from bs4 import BeautifulSoup
d = {
"EmailAddress": "your#email.tld",
"Password": "password",
"RememberMe": True,
"SubscriberId": "",
"LicenseNumber": "",
"CountryCode": "SG"
}
req = requests.Session()
login_u = "https://sso.mims.com/"
html = req.post(login_u, data=d)
products_url = "https://mims.com/india/browse/alphabet/a?cat=drug"
html = req.get(products_url) # The cookies generated on the previous request will be use on this one automatically because we use Sessions
# Here's the tricky part. The site uses 2 intermediary "relogin" pages that (theoretically) are only available with JavaScript enabled, but we can bypass that, i.e.:
soup = BeautifulSoup(html.text, "html.parser")
form = soup.find('form', {"id": "openid_message"})
form_url = form['action'] # used on the next post request
inputs = form.find_all('input')
form_dict = {}
for input in inputs:
if input.get('name'):
form_dict[input.get('name')] = input.get('value')
form_dict['submit_button'] = "Continue"
relogin = req.post(form_url, data=form_dict)
soup = BeautifulSoup(relogin.text, "html.parser")
form = soup.find('form', {"id": "openid_message"})
form_url = form['action'] # used
inputs = form.find_all('input')
form_dict = {}
for input in inputs:
if input.get('name'):
form_dict[input.get('name')] = input.get('value')
products_a = req.post(form_url, data=form_dict)
print(products_a.text)
# You can now request any url normally because the necessary cookies are already present on the current Session()
products_url = "https://mims.com/india/browse/alphabet/c?cat=drug"
products_c = req.get(products_url)
print(products_c.text)
Selenium solution
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from time import sleep
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 10)
driver.maximize_window()
driver.get("https://sso.mims.com/")
el = wait.until(EC.element_to_be_clickable((By.ID, "EmailAddress")))
el.send_keys("your#email.com")
el = wait.until(EC.element_to_be_clickable((By.ID, "Password")))
el.send_keys("password")
el = wait.until(EC.element_to_be_clickable((By.ID, "btnSubmit")))
el.click()
wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "profile-section-header"))) # we logged in successfully
driver.get("http://mims.com/india/browse/alphabet/a?cat=drug")
wait.until(EC.visibility_of_element_located((By.CLASS_NAME, "searchicon")))
print(driver.page_source)
# do what you need with the source code
I have been trying to login to my instagram account with mechanize python for the past while and for some reason it is not working.
To check if I have logged in correctly, I decided to check the url with "br.geturl()", which should read "https://www.instagram.com/" once the login is successful, but after I run the program it is just:
"https://www.instagram.com/accounts/login/username=username_here&password=password_here"
Anyone know how to fix this?
Note: I know forsure my login info is correct.
Here is my code:
import mechanize
br = mechanize.Browser()
url = "https://www.instagram.com/accounts/login/"
br.set_handle_robots(False)
response = br.open(url)
f = list(br.forms())
br.form = f[0]
print br.form
br.form["username"] = 'username_goes_here'
br.form["password"] = 'password_goes_here'
br.submit()
print br.geturl()
Since the form is generated via JavaScript. So it could not be found on the html. One of the way would be using Selenium Webdriver.
from selenium import webdriver
driver = webdriver.Chrome('/Usr/chromedriver')
driver.get("https://www.instagram.com/accounts/login/?source=auth_switcher")
username = driver.find_element_by_xpath('//*[#name="username"]')
password = driver.find_element_by_xpath('//*[#name="password"]')
login_btn = driver.find_element_by_xpath('//*[#class="oF4XW sqdOP L3NKy "]')
username.send_keys("username")
password.send_keys("password")
#test to see if input values are reflecting
print(username.get_attribute('value'))
print(password.get_attribute('value'))
#login
login_btn.click()
logged_in_class = driver.find_elements_by_class_name("logged-in")
not_logged_in_class = driver.find_elements_by_class_name("not-logged-in")
#to check if logged-in or not-logged-in
print(len(logged_in_class))
print(len(not_logged_in_class))
driver.quit()
I have a Python programme that I have stored my log in details on, How would I get the python programme to connect to the Facebook login page and input my log in details for me and then log in or return something?
E.g:
my details are in my main programme(Email and password), then I want to connect to Facebook, and have my program enter the details and send that off to Facebook.
Main Python File:
import urllib
import urllib2
def facebookDetails():
url = 'https://www.facebook.com/'
values = {'email' : 'somebody#facebook.com',
'pass' : 'password',
}
data = urllib.urlencode(values)
req = urllib2.Request(url, data)
response = urllib2.urlopen(req)
the_page = response.read()
Here's an example using selenium:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
if __name__ == '__main__':
iuser = 'username'
ipass = 'password'
# You will also need to download PhantomJS
driver = webdriver.PhantomJS(pathToPhantomJS.exe)
driver.get('https://www.facebook.com/')
email = driver.find_element_by_xpath('//*[#id="email"]')
email.send_keys(iuser)
password = driver.find_element_by_xpath('//*[#id="pass"]')
password.send_keys(ipass)
login_button = driver.find_element_by_xpath('//*[#id="u_0_n"]')
login_button.click()