I am using the following code to log in to a website with csrfmiddlewaretoken verification, but it throws me the following error:
"csrfmiddlewaretoken = HTML.find_all('input')[1]['value']
IndexError: list index out of range"
What do you think is the problem, I'm new using python :)
import requests
from bs4 import BeautifulSoup
request_url = 'https://text.gob.pe/accounts/login/'
with requests.session() as session:
get_url = session.get('https://text.gob.pe/accounts/login/')
HTML = BeautifulSoup(get_url.text, 'html.parser')
csrfmiddlewaretoken = HTML.find_all('input')[1]['value']
#logging in
payload = {
'next' : '/ profile /',
'username' : 'secret',
'password' : 'secret',
'next': '/ profile /',
'csrfmiddlewaretoken': csrfmiddlewaretoken
}
headers = {
'Referer': 'https://text.gob.pe/accounts/login/'
}
login_request = session.post(request_url,payload, headers=headers)
home_page = session.get("https://text.gob.pe/ficha/buscar/")
print(home_page.content)
Without a username and password, it is difficult to show how to proceed. At the moment your mistake is that you didn't specify headers
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}
url = "https://logincovid19.minsa.gob.pe/accounts/login/"
session = requests.Session()
response = session.get(url, headers=headers)
csrfmiddlewaretoken = BeautifulSoup(response.text, 'lxml').find('input', {'name': 'csrfmiddlewaretoken'}).get('value')
Now csrfmiddlewaretoken is something like this - eyHLYFv7HOYxglzFS9a3JDxOT38u8mrakdwhatOnkvcJJzwN9dNi6olBxJxD1HZi
I think further the code will look like this, but you need to check it:
user = 'YourUserNmaeHere'
password = 'UserPaswordHere'
payload = f'csrfmiddlewaretoken={csrfmiddlewaretoken}&username={user}&password={password}'
response = session.post(url, data=payload, headers=headers)
Related
I'm trying to scrape all the follower names from a profile page using requests module. The problem is when I run the script below, I get the first 20 names over and over again.
The parameters used in post requests only have two keys and values like size:20 and continuation:timestamp. I tried to use the parameters in the right way but still I get the same results repeatedly.
import time
import requests
link = 'https://api-mainnet.rarible.com/marketplace/api/v4/followers'
params = {'user': '0xe744d23107c9c98df5311ff8c1c8637ec3ecf9f3'}
payload = {"size": 20}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'origin': 'https://rarible.com',
'referer': 'https://rarible.com/'
}
with requests.Session() as s:
s.headers.update(headers)
while True:
res = s.post(link,params=params,json=payload)
print(s.headers)
for item in res.json():
print(item['owner'].get('name',''))
payload['continuation'] = f"{int(time.time() * 1000)}"
time.sleep(2)
How can I parse all the follower names from that page using requests?
Your next continuation value is in X-CONTINUATION response header, so this will work when increasing size in payload doesn't:
import requests
link = 'https://api-mainnet.rarible.com/marketplace/api/v4/followers'
params = {'user': '0xe744d23107c9c98df5311ff8c1c8637ec3ecf9f3'}
payload = {"size": 20}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'origin': 'https://rarible.com',
'referer': 'https://rarible.com/'
}
res = requests.post(link, headers=headers, params=params, json=payload)
print(res.headers["X-CONTINUATION"])
while True:
for item in res.json():
print(item['owner'].get('name',))
if not res.headers["X-CONTINUATION"]:
break
payload['continuation'] = res.headers["X-CONTINUATION"]
res = requests.post(link, headers=headers, params=params, json=payload)
some api may block you from extracting values more than certain limit and also may show in pages with limits.
For me just increasing the size payload worked with your code.
import time
import requests
link = 'https://api-mainnet.rarible.com/marketplace/api/v4/followers'
params = {'user': '0xe744d23107c9c98df5311ff8c1c8637ec3ecf9f3'}
payload = {"size": 10000}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36',
'origin': 'https://rarible.com',
'referer': 'https://rarible.com/'
}
with requests.Session() as s:
s.headers.update(headers)
res = s.post(link,params=params,json=payload)
print(len(res.json()))
for item in res.json():
print(item['owner'].get('name',''))
I'm trying to log-in to a website using Python requests. My setup is:
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.naturalpartners.com',
'referer':'https://www.naturalpartners.com/us/account/login.jsp',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
payload = {'_dyncharset': 'UTF-8',
'_dynSessConf': '',
'/atg/userprofiling/ProfileFormHandler.flowName': 'B2B',
'_D:/atg/userprofiling/ProfileFormHandler.flowName': ' ',
'/atg/userprofiling/ProfileFormHandler.loginSuccessURL': '/us/index.jsp?',
'_D:/atg/userprofiling/ProfileFormHandler.loginSuccessURL': ' ',
'/atg/userprofiling/ProfileFormHandler.loginErrorURL': '/us/account/login.jsp?',
'_D:/atg/userprofiling/ProfileFormHandler.loginErrorURL': ' ',
'/atg/userprofiling/ProfileFormHandler.value.login': '__UsernameHidden__',
'_D:/atg/userprofiling/ProfileFormHandler.value.login': ' ',
'/atg/userprofiling/ProfileFormHandler.value.password': '__PasswordHidden__',
'_D:/atg/userprofiling/ProfileFormHandler.value.password': ' ',
'/atg/userprofiling/ProfileFormHandler.login': 'Sign In',
'_D:/atg/userprofiling/ProfileFormHandler.value.login': ' ',
'_DARGS': '/us/account/login.jsp'}
My code to execute the login is:
from requests_html import HTMLSession
session = HTMLSession() #new session
url = 'https://www.naturalpartners.com/us/account/login.jsp?_DARGS=/us/account/login.jsp'
r = session.get('https://www.naturalpartners.com/us/account/login.jsp', headers = headers) #get login page
dyn = r.html.xpath('//input[#name="_dynSessConf"]/#value')[0] #store _dynSessConf
payload['_dynSessConf'] = dyn #assign to payload
headers['cookie'] = r.headers['Set-Cookie'] #attempt to set cookie value
post = session.post(url, data = payload, headers=headers)
My issue is I'm getting a 200 response as opposed to a 302 which would signal a successful login. The post seems to be completing the form with the username and password but not actually submitting it. So the login doesn't actually take place. I'm not sure what step I'm missing. Thank you
I'm trying to compose a post request for uploading a photo to Instagram, but the third request returns a 403 error to "instagram.com/create/configure/"
My code:
import re
import requests
import urllib.request
from datetime import datetime
link = 'https://www.instagram.com/accounts/login/'
login_url = 'https://www.instagram.com/accounts/login/ajax/'
login = 'login_name'
password = '0000'
time = int(datetime.now().timestamp())
print(time)
payload = {
'username': login,
'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{time}:{password}',
'queryParams': {},
'optIntoOneTap': 'false'
}
with requests.Session() as s:
r = s.get(link)
a = r.cookies['csrftoken']
print(r.cookies)
print(a)
csrf = re.findall(r"csrf_token\":\"(.*?)\"", r.text)[0]
print(csrf)
r = s.post(login_url, data=payload, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
"X-Requested-With": "XMLHttpRequest",
"Referer": "https://www.instagram.com/accounts/login/",
"x-csrftoken": csrf
})
print(r.status_code)
print(r.url)
print(r.text)
print(s.cookies)
r = s.get('https://www.instagram.com/accounts/edit/')
print(login in r.text)
microtime = int(datetime.now().timestamp())
headers = {
"content-type": "image / jpg",
"X-Entity-Name" : f"fb_uploader_{microtime}",
"Offset": "0",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
"x-entity-length": "299255",
"X-Instagram-Rupload-Params": f'{{"media_type": 1, "upload_id": {microtime}, "upload_media_height": 1080, "upload_media_width": 1080}}',
"x-csrftoken": csrf,
"x-ig-app-id": "1217981644879628"
}
img = urllib.request.urlopen('https://sun9-64.userapi.com/RVgUHSq9fXrDr8YBJ4a4h9xwN4EQA_8BXuQ5Vg/Mdx3LwawEmY.jpg')
photo = img.read()
r = s.post(f'https://www.instagram.com/rupload_igphoto/fb_uploader_{microtime}', data=open("4.jpg", "rb"), headers=headers)
print(r.text)
headers = {
'Content-Length': '104',
'content-type': 'application/x-www-form-urlencoded',
"origin": "https://www.instagram.com",
"referer": "https://www.instagram.com/create/details/",
'user-agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
"x-csrftoken": csrf,
"x-ig-app-id": "1217981644879628",
"X-Requested-With": "XMLHttpRequest"
}
body = {
'upload_id': f'{microtime}',
"caption":'00000000000000000000',
'usertags':'',
'custom_accessibility_caption': '',
'retry_timeout':''
}
r = s.post('https://www.instagram.com/create/configure/', data=body, headers=headers)
print(r.status_code)
print(r.text)
Process:
First authorization, everything is okay with that
The second request is to upload a photo, everything is ok too
The third request, with the parameters of the photo, there is an error ...
error 403 is returned
You need to give the csrf_token and session_id as cookies in the third request.
you can find the complete guide here:
Share a post into your Instagram account using the requests library
im trying to get specific data in json response from spotify. My current code is:
with requests.Session()as(c):
url = 'https://accounts.spotify.com/en/login?continue=https:%2F%2Fwww.spotify.com%2Fint%2Faccount%2Foverview%2F'
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
page = c.get(url, headers=headers)
CSRF = page.cookies['csrf_token']
headers = {'Accept': '*/*', 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A403 Safari/602.1',
'Referer': 'https://accounts.spotify.com/en/login/?continue=https:%2F%2Fwww.spotify.com%2Fus%2Fgooglehome%2Fregister%2F&_locale=en-US'}
url = 'https://accounts.spotify.com/api/login'
login_data = {
'remember': 'true',
'username': USER,
'password': PASS,
'csrf_token': CSRF
}
cookies = dict(__bon='MHwwfC0xNDAxNTMwNDkzfC01ODg2NDI4MDcwNnwxfDF8MXwx')
login = c.post(url, headers=headers, data=login_data, cookies=cookies)
if '{"displayName":"' in login.text:
url = 'https://www.spotify.com/us/account/overview/'
capture = c.get(url, headers=headers)
csr = capture.headers['X-Csrf-Token']
json_data = json.loads(login.text)
result = json_data['displayName']
print(result)
when I run this
{"displayName":"John Doe",
"smallImageUrl":"https://scontent.xx.fbcdn.net/v/t1.0-1/p50x50/10407982_10104564418730171_2968639978505808989_n.jpg?_nc_cat=110\u0026_nc_oc=AQmoXCg0tfbf9LuxGWAbpEv-96K57xmie4S3avDrYh3l90g8W-ParNV5mNK0oPU6ERk\u0026_nc_ht=scontent.xx\u0026oh=2fe2149364f012a3c5e43e6d999375ab\u0026oe=5DBD8940",
"largeImageUrl":"https://scontent.xx.fbcdn.net/v/t1.0-1/p200x200/10407982_10104564418730171_2968639978505808989_n.jpg?_nc_cat=110\u0026_nc_oc=AQmoXCg0tfbf9LuxGWAbpEv-96K57xmie4S3avDrYh3l90g8W-ParNV5mNK0oPU6ERk\u0026_nc_ht=scontent.xx\u0026oh=3aec23a8c56be536739ba4cca4e1cc6e\u0026oe=5DBC409D"}
I would like to only print the value: John Doe (SOLVED). but how to get John Doe value in field for discord bot ?
embed=discord.Embed()
embed.add_field(name=undefined, value=undefined, inline=False)
await self.bot.say(embed=embed)
you can use the json method of the requests response object.
login = c.post(url, headers=headers, data=login_data, cookies=cookies)
login_json = login.json()
if "displayName" in login_json:
print(login_json["displayName"])
Use json library
import json
#Your response, probably login.text
s = """{"displayName":"John Doe","smallImageUrl":"https://scontent.xx.fbcdn.net/v/t1.0-1/p50x50/10407982_10104564418730171_2968639978505808989_n.jpg?_nc_cat=110\u0026_nc_oc=AQmoXCg0tfbf9LuxGWAbpEv-96K57xmie4S3avDrYh3l90g8W-ParNV5mNK0oPU6ERk\u0026_nc_ht=scontent.xx\u0026oh=2fe2149364f012a3c5e43e6d999375ab\u0026oe=5DBD8940","largeImageUrl":"https://scontent.xx.fbcdn.net/v/t1.0-1/p200x200/10407982_10104564418730171_2968639978505808989_n.jpg?_nc_cat=110\u0026_nc_oc=AQmoXCg0tfbf9LuxGWAbpEv-96K57xmie4S3avDrYh3l90g8W-ParNV5mNK0oPU6ERk\u0026_nc_ht=scontent.xx\u0026oh=3aec23a8c56be536739ba4cca4e1cc6e\u0026oe=5DBC409D"}"""
dj = json.loads(s)
print(dj["displayName"])
OUTPUT
John Doe
please help to deal with the authorization through the script. the problem is that it is impossible to automatically insert a get-request invbf_session_id.
import pprint
import requests
import re
import shelve
import bs4
def scanning_posts():
print('------------------------enter begin--------------------')
url = 'http://forum.saransk.ru/'
html = requests.get(url)
pprint.pprint(html.headers)
rawCookie = html.headers['Set-Cookie']
cookie = re.search(r"invbf_session_id=(.*?);", rawCookie).group(1)
pprint.pprint(cookie) # cookie != zzzzzzzzzzzzzzzzzzzzzz
html = requests.get(url)
soup = bs4.BeautifulSoup(html.text)
loginForm = soup.find('form', {'id': 'login'})
hiddenAuthKey = soup.find('input', {'name': 'auth_key'})['value']
authData = {
'ips_username': 'xxxxxx',
'ips_password': 'yyyyyy',
'auth_key': hiddenAuthKey,
'rememberMe': 1,
'referer': 'http://forum.saransk.ru/'
}
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36',
'Referer': 'http://forum.saransk.ru/forum/'
}
#pprint.pprint(authData)
print('\tlogin: ', authData['ips_username'])
cookie = dict(invbf_session_id='zzzzzzzzzzzzzzzzzzzzzz')
req = requests.get(url, params=authData, cookies=cookie, headers=header)
soup = bs4.BeautifulSoup(req.text)
signLinkNotLogged = soup.find('a', {'id': 'sign_in'})
if signLinkNotLogged:
print('------------------------enter failed--------------------')
else:
print('------------------------enter successful--------------------')
scanning_posts()
After running the script displays the wrong value invbf_session_id, as seen in FF firebug. respectively, authorization is not obtained.
if the value invbf_session_id copy of FF firebug and paste the script, the authorization is successful