Trying to log in to MyFitnessPal via requests: what am I missing? - python

I am trying to access my account on MyFitnessPal in order to download my own food diaries. However, whenever I run the following code, I am consistently redirected to the login page. What am I missing? In the HTML code for the login page, I only see two input tags, one for "email" and one for "password", both of which I'm making sure to supply. I'm pretty new to web scraping, so any advice would be appreciated!
import requests
from bs4 import BeautifulSoup
# Save relevant urls
base_url = 'https://www.myfitnesspal.com'
login_action = '/account/login'
login_url = base_url + login_action
date = datetime.datetime(2022,3,13)
fmt_date = date.strftime('%Y-%m-%d')
food_url = base_url + '/reports/printable_diary/?from=' + fmt_date + '&to=' + fmt_date
headers = {'user-agent': {user agent}}
credentials = {'email': {email}, 'password': {password}}
s = requests.session()
login = s.post(login_url, headers = headers, data = credentials}
r = s.get(food_url, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
print(soup.prettify())
What ends up getting printed is the HTML from the login page. (I have confirmed this by also printing the login page's HTML.)

Try this: r = requests.get('https://www.myfitnesspal.com/account/login', auth= ('email', 'password'))
I got a 200 response wit this.

Related

Beautifulsoup Facebook Login

I am trying to use Beautifulsoup to scrape the post data by using the below code,
but I found that the beautifulsoup fail to login, that cause the scraper return text of all the post and include the header message (text that ask you to login).
Might I know how to modify the code in order to return info for the specific post with that id not all the posts info. Thanks!
import requests
from bs4 import BeautifulSoup
class faceBookBot():
login_basic_url = "https://mbasic.facebook.com/login"
login_mobile_url = 'https://m.facebook.com/login'
payload = {
'email': 'XXXX#gmail.com',
'pass': "XXXX"
}
post_ID = ""
# login to facebook and redirect to the link with specific post
# I guess something wrong happen in below function
def parse_html(self, request_url):
with requests.Session() as session:
post = session.post(self.login_basic_url, data=self.payload)
parsed_html = session.get(request_url)
return parsed_html
# scrape the post all <p> which is the paragraph/content part
def post_content(self):
REQUEST_URL = f'https://m.facebook.com/story.php?story_fbid={self.post_ID}&id=7724542745'
soup = BeautifulSoup(self.parse_html(REQUEST_URL).content, "html.parser")
content = soup.find_all('p')
post_content = []
for lines in content:
post_content.append(lines.text)
post_content = ' '.join(post_content)
return post_content
bot = faceBookBot()
bot.post_ID = "10158200911252746"
You can't, facebook encrypts password and you don't have encryption they use, server will never accept it, save your time and find another way
#AnsonChan yes, you could open the page with selenium, login and then copy it's cookies to requests:
from selenium import webdriver
import requests
driver = webdriver.Chrome()
driver.get('http://facebook.com')
# login manually, or automate it.
# when logged in:
session = requests.session()
[session.cookies.update({cookie['name']: cookie['value']}) for cookie in driver.get_cookies()]
driver.quit()
# get the page you want with requests
response = session.get('https://m.facebook.com/story.php?story_fbid=123456789')

Not able to log in to site and scrape data

I'm trying to scrape the site data, but facing issue while logging in to the site. when I log in to the site with username and password it does not do so.
I think there is an issue with the token, every time I try to login to the system a token is generated(check in the console headers)
import requests
from bs4 import BeautifulSoup
s = requests.session()
url = "http://indiatechnoborate.tymra.com"
with requests.Session() as s:
first = s.get(url)
start_soup = BeautifulSoup(first.content, 'lxml')
print(start_soup)
retVal=start_soup.find("input",{"name":"return"}).get('value')
print(retVal)
formdata=start_soup.find("form",{"id":"form-login"})
dynval=formdata.find_all('input',{"type":"hidden"})[1].get('name')
print(dynval)
dictdata={"username":"username", "password":"password","return":retVal,dynval:"1"
}
print(dictdata)
pr = {"task":"user.login"}
print(pr)
sec = s.post("http://indiatechnoborate.tymra.com/component/users/",data=dictdata,params=pr)
print("------------------------------------------")
print(sec.status_code,sec.url)
print(sec.text)
I want to log in to the site and want to get the data after login is done
try replacing this line:
dictdata={"username":"username", "password":"password","return":retVal,dynval:"1"}
with this one:
dictdata={"username":"username", "password":"password","return":retVal + "==",dynval:"1"}
hope this helps
Try to use authentication methods instead of passing in payload
import requests
from requests.auth import HTTPBasicAuth
USERNAME = "<USERNAME>"
PASSWORD = "<PASSWORD>"
BASIC_AUTH = HTTPBasicAuth(USERNAME, PASSWORD)
LOGIN_URL = "http://indiatechnoborate.tymra.com"
response = requests.get(LOGIN_URL,headers={},auth=BASIC_AUTH)

Unable to login to indeed.com using python requests

I'm trying to write a code to collect resumes from "indeed.com" website.
In order to download resumes from "indeed.com" you have to login with your account.
The problem with me is after posting data it shows me response [200] which indicates successful post but still fail to login.
Here is my code :
import requests
from bs4 import BeautifulSoup
from lxml import html
page = requests.get('https://secure.indeed.com/account/login')
soup = BeautifulSoup(page.content, 'html.parser')
row_text = soup.text
surftok = str(row_text[row_text.find('"surftok":')+11:row_text.find('","tmpl":')])
formtok = str(row_text[row_text.find('"tk":') + 6:row_text.find('","variation":')])
logintok = str(row_text[row_text.find('"loginTk":') + 11:row_text.find('","debugBarLink":')])
cfb = int(str(row_text[row_text.find('"cfb":')+6:row_text.find(',"pvr":')]))
pvr = int(str(row_text[row_text.find('"pvr":') + 6:row_text.find(',"obo":')]))
hl = str(row_text[row_text.find('"hl":') + 6:row_text.find('","co":')])
data = {
'action': 'login',
'__email': 'myEmail',
'__password': 'myPassword',
'remember': '1',
'hl': hl,
'cfb': cfb,
'pvr': pvr,
'form_tk': formtok,
'surftok': surftok,
'login_tk': logintok
}
response = requests.post("https://secure.indeed.com/", data=data)
print response
print 'myEmail' in response.text
It shows me response [200] but when I search for my email in the response page to make sure that login is successful, I don't find it. It seems that login failed for a reason that I don't know.
send headers as well in your post request, get the headers from response headers of your browser.
headers = {'user-agent': 'Chrome'}
response = requests.post("https://secure.indeed.com/",headers = headers, data=data)
Some websites uses JavaScript redirection. "indeed.com" is one of them. Unfortunately, python requests does not support JavaScript redirection. In such situations we may use selenium.

Login to aspx website using python requests

I'm trying to log into my school website that utilizes aspx with requests in order to scrape some data. My problem is similar to the one described here:
Log in to ASP website using Python's Requests module
However, my form also requires SubmitButton.x and SubmitButton.y and I don't know where to get them form. I tried to pass in values that worked in manual login, but it didn't work.
Here's the page
form data from successful manual login
from bs4 import BeautifulSoup
import requests
data = {}
with requests.Session() as s:
page = s.get('https://adfslight.resman.pl/LoginPage.aspx?ReturnUrl=%2f%3fwa%3dwsignin1.0%26wtrealm%3dhttps%253a%252f%252fcufs.resman.pl%253a443%252frzeszow%252fAccount%252fLogOn%26wctx%3drm%253d0%2526id%253dADFS%2526ru%253d%25252frzeszow%25252fFS%25252fLS%25253fwa%25253dwsignin1.0%252526wtrealm%25253dhttps%2525253a%2525252f%2525252fuonetplus.resman.pl%2525252frzeszow%2525252fLoginEndpoint.aspx%252526wctx%25253dhttps%2525253a%2525252f%2525252fuonetplus.resman.pl%2525252frzeszow%2525252fLoginEndpoint.aspx%26wct%3d2018-02-04T18%253a08%253a18Z&wa=wsignin1.0&wtrealm=https%3a%2f%2fcufs.resman.pl%3a443%2frzeszow%2fAccount%2fLogOn&wctx=rm%3d0%26id%3dADFS%26ru%3d%252frzeszow%252fFS%252fLS%253fwa%253dwsignin1.0%2526wtrealm%253dhttps%25253a%25252f%25252fuonetplus.resman.pl%25252frzeszow%25252fLoginEndpoint.aspx%2526wctx%253dhttps%25253a%25252f%25252fuonetplus.resman.pl%25252frzeszow%25252fLoginEndpoint.aspx&wct=2018-02-04T18%3a08%3a18Z').content
soup = BeautifulSoup(page, "lxml")
data["__EVENTTARGET"] = ""
data["__EVENTARGUMENT"] = ""
data["___VIEWSTATE"] = soup.select_one("#__VIEWSTATE")["value"]
data["__VIEWSTATEGENERATOR"] = soup.select_one("#__VIEWSTATEGENERATOR")["value"]
data["__EVENTVALIDATION"] = soup.select_one("#__EVENTVALIDATION")["value"]
data["UsernameTextBox"] = "myusername"
data["PasswordTextBox"] = "mypassword"
data["SubmitButton.x"] = "49"
data["SubmitButton.y"] = "1"
s.post('https://adfslight.resman.pl/LoginPage.aspx?ReturnUrl=%2f%3fwa%3dwsignin1.0%26wtrealm%3dhttps%253a%252f%252fcufs.resman.pl%253a443%252frzeszow%252fAccount%252fLogOn%26wctx%3drm%253d0%2526id%253dADFS%2526ru%253d%25252frzeszow%25252fFS%25252fLS%25253fwa%25253dwsignin1.0%252526wtrealm%25253dhttps%2525253a%2525252f%2525252fuonetplus.resman.pl%2525252frzeszow%2525252fLoginEndpoint.aspx%252526wctx%25253dhttps%2525253a%2525252f%2525252fuonetplus.resman.pl%2525252frzeszow%2525252fLoginEndpoint.aspx%26wct%3d2018-02-04T18%253a08%253a18Z&wa=wsignin1.0&wtrealm=https%3a%2f%2fcufs.resman.pl%3a443%2frzeszow%2fAccount%2fLogOn&wctx=rm%3d0%26id%3dADFS%26ru%3d%252frzeszow%252fFS%252fLS%253fwa%253dwsignin1.0%2526wtrealm%253dhttps%25253a%25252f%25252fuonetplus.resman.pl%25252frzeszow%25252fLoginEndpoint.aspx%2526wctx%253dhttps%25253a%25252f%25252fuonetplus.resman.pl%25252frzeszow%25252fLoginEndpoint.aspx&wct=2018-02-04T18%3a08%3a18Z', data=data)
open_page = s.get("https://uonetplus.resman.pl/rzeszow/Start.mvc/Index")
print(open_page.text)

Fill and submit html form

I am trying / wanting to write a Python script (2.7) that goes to a form on a website (with the name "form1") and fills in the first input-field in said form with the word hello, the second input-field with the word Ronald, and the third field with ronaldG54#gmail.com
Can anyone help me code or give me any tips or pointers on how to do this ?
Aside from Mechanize and Selenium David has mentioned, it can also be achieved with Requests and BeautifulSoup.
To be more clear, use Requests to send request to and retrieve responses from server, and use BeautifulSoup to parse the response html to know what parameters to send to the server.
Here is an example script I wrote that uses both Requests and BeautifulSoup to submit username and password to login to wikipedia:
import requests
from bs4 import BeautifulSoup as bs
def get_login_token(raw_resp):
soup = bs(raw_resp.text, 'lxml')
token = [n['value'] for n in soup.find_all('input')
if n['name'] == 'wpLoginToken']
return token[0]
payload = {
'wpName': 'my_username',
'wpPassword': 'my_password',
'wpLoginAttempt': 'Log in',
#'wpLoginToken': '',
}
with requests.session() as s:
resp = s.get('http://en.wikipedia.org/w/index.php?title=Special:UserLogin')
payload['wpLoginToken'] = get_login_token(resp)
response_post = s.post('http://en.wikipedia.org/w/index.php?title=Special:UserLogin&action=submitlogin&type=login',
data=payload)
response = s.get('http://en.wikipedia.org/wiki/Special:Watchlist')
Update:
For your specific case, here is the working code:
import requests
from bs4 import BeautifulSoup as bs
def get_session_id(raw_resp):
soup = bs(raw_resp.text, 'lxml')
token = soup.find_all('input', {'name':'survey_session_id'})[0]['value']
return token
payload = {
'f213054909': 'o213118718', # 21st checkbox
'f213054910': 'Ronald', # first input-field
'f213054911': 'ronaldG54#gmail.com',
}
url = r'https://app.e2ma.net/app2/survey/39047/213008231/f2e46b57c8/?v=a'
with requests.session() as s:
resp = s.get(url)
payload['survey_session_id'] = get_session_id(resp)
response_post = s.post(url, data=payload)
print response_post.text
Take a look at Mechanize and Selenium. Both are excellent pieces of software that would allow you to automate filling and submitting a form, among other browser tasks.

Categories