I'm trying to use a python script to access on a website
But it tells me CSRF verification failed. Request aborted. The python-requests code is as followed:
import requests
payload = {
'inUserName': 'NAN',
'inUserPass': 'NAN'
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like
Gecko) Chrome/103.0.0.0 Safari/537.36',
'From': 'https://pruebita.gob.pe/accounts/login/',
}
with requests.Session() as s:
p = s.post('https://pruebita.gob.pe/accounts/login/', data=payload, headers=headers)
print (p.text)
r = s.get('A protected web page url')
print (r.text)
Related
I would like to scrape tabular content from the landing page of this website. There are 100 rows in it's first page. When I observe network activity in dev tools, I could notice that some get requests is being issued to this url https://io6.dexscreener.io/u/ws3/screener3/ with appropriate parameters which ends up producing json content.
However, when I try to mimic that requests through my following efforts:
import requests
url = 'https://io6.dexscreener.io/u/ws3/screener3/'
params = {
'EIO': '4',
'transport': 'polling',
't': 'NwYSrFK',
'sid': 'ztAOHWOb-1ulTq-0AQwi',
}
headers = {
'accept': '*/*',
'referer': 'https://dexscreener.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'
}
with requests.Session() as s:
s.headers.update(headers)
res = s.get(url,params=params)
print(res.content)
I get this response:
`{"code":3,"message":"Bad request"}`
How can I get response having tabular content from that webpage?
Here is a very quick and dirty piece of python code that does the initial handshake and sets up the websocket connection and downloads the data in json format infinitely. I haven't tested this code extensively and I am not sure exactly what is necessary or not (in terms of the steps in the handshake) but I have mimicked the browser behaviour and it seems to work fine:
import requests
from websocket import create_connection
import json
s = requests.Session()
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'}
url = 'https://dexscreener.com/ethereum'
resp = s.get(url,headers=headers)
print(resp)
step1 = s.get('https://io3.dexscreener.io/u/ws3/screener3/?EIO=4&transport=polling&t=Nwof-Os')
step2 = s.get('https://io4.dexscreener.io/u/ws3/screener3/?EIO=4&transport=polling&t=Nwof-S5')
obj = json.loads(step2.text[1:])
code = obj['sid']
payload = '40/u/ws/screener/consolidated/platform/ethereum/h1/top/1,'
step3 = s.post(f'https://io4.dexscreener.io/u/ws3/screener3/?EIO=4&transport=polling&t=Nwof-Xt&sid={code}',data=payload)
step4 = s.get(f'https://io4.dexscreener.io/u/ws3/screener3/?EIO=4&transport=polling&t=Nwof-Xu&sid={code}')
d = step4.text.replace('','').replace('42/u/ws/screener/consolidated/platform/ethereum/h1/top/1,','').replace(payload,'')
start = '["screener",'
end = ']["latestBlock",'
dirty = d[d.find(start)+len(start):d.rfind(end)].strip()
clean = json.loads(dirty)
print(clean)
# Initialize the headers needed for the websocket connection
headers = json.dumps({
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'en-ZA,en;q=0.9,en-GB;q=0.8,en-US;q=0.7,de;q=0.6',
'Cache-Control':'no-cache',
'Connection':'Upgrade',
'Host':'io3.dexscreener.io',
'Origin':'https://dexscreener.com',
'Pragma':'no-cache',
'Sec-WebSocket-Extensions':'permessage-deflate; client_max_window_bits',
'Sec-WebSocket-Key':'ssklBDKxAOUt3D47SoEttQ==',
'Sec-WebSocket-Version':'13',
'Upgrade':'websocket',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'
})
# Then create a connection to the tunnel
ws = create_connection(f"wss://io4.dexscreener.io/u/ws3/screener3/?EIO=4&transport=websocket&sid={code}",headers=headers)
# Then send the initial messages through the tunnel
ws.send('2probe')
ws.send('5')
# Here you will view the message return from the tunnel
while True:
try:
json_data = json.loads(ws.recv().replace('42/u/ws/screener/consolidated/platform/ethereum/h1/top/1,',''))
print(json_data)
except:
pass
In the request headers when logging in, there's a header called "cookie" that changes every time, how would I grab that each time and put it in the headers using python requests?
screenshot of network tab in chrome
Heres my code:
import requests
import time
proxies = {
"http": "http://us.proxiware.com:2000"
}
login_data = {'op':'login-main', 'user':'UpbeatPark', 'passwd':'Testingreddit123', 'api_type':'json'}
comment_data = {'thing_id':'t3_gluktj', 'text':'epical. redditor', 'id':'#form-t3_gluktjbx2', 'r':'gaming','renderstyle':'html'}
s = requests.Session()
s.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4085.6 Safari/537.36'})
r = s.get('https://old.reddit.com/', proxies=proxies)
time.sleep(2)
r = s.post('https://old.reddit.com/api/login/UpbeatPark', proxies=proxies, data=login_data)
print(r.text)
here's the output (I know for a fact it is the correct password):
{"json": {"errors": [["WRONG_PASSWORD", "wrong password", "passwd"]]}}
This worked for me:
import requests
login_data = {
"op": "login-main",
"user": "USER",
"passwd": "PASS",
"api_type": "json",
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/82.0.4085.6 Safari/537.36",
}
s = requests.Session()
r = s.post("https://old.reddit.com/api/login/USER", headers=headers, data=login_data)
print(r.text)
It seems exactly like the code you are using but without proxy. Can you try to turn it off? The proxy might block cookies.
I am trying to retrieve some html text behind a login page using python requests Post. But my code fails to do so with return html containing .... The page has expired due to inactivity..
Below is my code.
import requests
url_login = u"https://savethewater-game.com/login"
headers = {
'referer': 'https://savethewater-game.com/login',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
payload = {
'_token': u'mUaaXNup3vCEtiln5QeQNJNwoO8LrCH9opoVE4GH',
'email': u'someone#gmail.com', # fake email
'password': u'12345678' # fake pass
}
with requests.Session() as session:
p = session.post(url_login, headers=headers, data=payload)
print(p.text)
The intercepted login value in Chrome dev tool is shown below:
:authority: savethewater-game.com
:method: POST
:path: /login
:scheme: https
accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
accept-encoding: gzip, deflate, br
accept-language: en,it;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,en-US;q=0.5
cache-control: max-age=0
content-length: 93
content-type: application/x-www-form-urlencoded
cookie: XSRF-TOKEN=eyJpdiI6ImtvRW5UT1BMNjkxNVFBc1d2OVJKZ3c9PSIsInZhbHVlIjoicVVCYlhFRG50QmVKd3V1Yzh4NnNldUhvRXpZOWVSRDFiUGNsT1E4aG9oOUFpYlZ0M1BaRFwvR3VkK1Q4MkhLOFlBZDlxUWp4R0s4YjU4aTZGc0I0RVZ3PT0iLCJtYWMiOiIzYzMxMmI0ZjlhOTM0YzVjZjA5NDk2MDkxMDJlY2VlMjVmNjhiYTJiM2E2OTlkYmYzOTIyYzJiYTM0NTJhMWMyIn0%3D; savethewater_session=eyJpdiI6IjltY2M3alp2endPdWY4VmVpNGhKMXc9PSIsInZhbHVlIjoiVjR2T2lHempPVGM1YW04YldtbGkxcWU3TlwvU1N2RTRcL0VoMzFPY2RLb245bXo0bVJreDl0UnBMYlFjaDNOZlZlMEQ2YVpKVXU3QVYxWWRGNW13bE9wdz09IiwibWFjIjoiNjk0YTdmNTFmYzJiMzg2MDA3NmRiOGU5OTUwMWVkMDE3ZmRkZDY1NzUzMjVjMTYxNzljNjNlZTc4NzE5ODYyNiJ9
origin: https://savethewater-game.com
referer: https://savethewater-game.com/login
sec-fetch-mode: navigate
sec-fetch-site: same-origin
sec-fetch-user: ?1
upgrade-insecure-requests: 1
user-agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36
Some posts mentioned that the reason could be the block of website from automatic scraping. I would like to know if mine codes went wrong or some other issues. Very much thanks!
It's hard to give a concrete solution without trial and error using that credentials. However, try the following. It should work.
import requests
from bs4 import BeautifulSoup
url_login = "https://savethewater-game.com/login"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
}
payload = {
'_token': '',
'email': 'someone#gmail.com',
'password': '12345678'
}
with requests.Session() as session:
res = session.get(url_login)
cookie_val = res.headers['Set-Cookie'].split(";")[0]
headers['cookie'] = cookie_val
soup = BeautifulSoup(res.text,"lxml")
token = soup.select_one('input[name="_token"]')['value']
payload['_token'] = token
p = session.post(url_login,data=payload,headers=headers)
print(p.content)
so ive been trying to figure out how to do the 'follow' thing using python codes on imvu.com, but it always returns the message "invalid arguments" error in $: failed reading: not a valid json value"
import requests
headers = {
"Origin": "https://secure.imvu.com/",
"Referer": "https://secure.imvu.com/next/av/Sammy165/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
"X-IMVU-SAUCE": "" #removed sauce for account safety
}
url = "https://api.imvu.com/profile/profile-user-696969696/subscriptions"
data = {"id": "https://api.imvu.com/profile/profile-user-175389029"}
req = requests.post(url=url, headers=headers, data=data)
print(req.text)
Have you tried
requests.post(url=url, headers=headers, json=data)
?
You have to do json.dumps(data). See code below
import requests
import json
headers = {
"Origin": "https://secure.imvu.com/",
"Referer": "https://secure.imvu.com/next/av/Sammy165/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36",
"X-IMVU-SAUCE": "" #removed sauce for account safety
}
url = "https://api.imvu.com/profile/profile-user-696969696/subscriptions"
data = {"id": "https://api.imvu.com/profile/profile-user-175389029"}
req = requests.post(url=url, headers=headers, data=json.dumps(data))
print(req.text)
Output:
{"status":"failure","error":"ERROR-GENERIC-001","message":"Permission Denied: You are not allowed to modify this subscription set."}
For some reason python requests does not do rePOST after encountered redirect header
import requests
proxies = {'http': 'http://127.0.0.1:8888',}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36',
'content-type': 'application/x-www-form-urlencoded'}
r = requests.post(url, data, headers=headers, timeout=timeout, proxies=proxies, allow_redirects=True,)
html = r.text
So it means I can't login to any form that is behind redirect. How can I solve this issue? Thank you!