Consecutive urllib2 POST gives 404 - python

The problem that I have — and try to solve with Python — is to make consecutive POST requests (completing an online form) for a website (specifically, a free online demo of an API at http://demo.travelportuniversalapi.com). I am not able to acquire the results page so far — been at this for two days now.
The code I employ is:
import sys
import urllib, urllib2, cookielib
from BeautifulSoup import BeautifulSoup
import re
class website:
def __init__(self):
self.host = 'demo.travelportuniversalapi.com'
self.ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0) Gecko/20100101 Firefox/23.0'
self.session = cookielib.CookieJar() #session devine o instanta a obiectului cookielib
pass
def get(self):
try:
url = 'http://demo.travelportuniversalapi.com/(S(cexfuhghvlzyzx5n0ysesra1))/Search' #this varies every 20 minutes
data = None
headers = {'User-Agent': self.ua}
request = urllib2.Request(url, data, headers)
self.session.add_cookie_header(request)
response = urllib2.urlopen(request)
self.session.extract_cookies(response, request)
url = response.geturl()
data = {'From': 'lhr', 'To': 'ams', 'Departure' : '9/4/2013','Return' : '9/6/2013'}
headers = {'User-Agent': self.ua, "Content-type": "application/x-www-form-urlencoded; charset=UTF-8",
}
request = urllib2.Request(url, urllib.urlencode(data), headers, 20)
self.session.add_cookie_header(request)
response = urllib2.urlopen(request, timeout=30) #HTTP Error 404: Not Found - aici am eroare
self.session.extract_cookies(response, request)
except urllib2.URLError as e:
print >> sys.stderr, e
return None
rt = website()
rt.get()
The error that I receive at the last urllib2.Request is HTTP Error 404: Not Found. I am not sure my cookies are working.
Monitoring HTTP packets with an addon in the browser I noticed the following header when the POST is sent in a broswer: 'X-Requested-With XMLHttpRequest' — is this relevant?

Related

Python post data params not working in the requests method of post

i've write a code to do crawl a page from doing post requests but still not working and i think the problem is from the data variable for sending post,
i tried manually changing values of the request in http live header it works to grap the json data, but in python3 , nothing yet.
please help, and thanks !
here is the code :
import requests
def print_hi(start):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0'}
url = "https://lite.ip2location.com/get-range.json?country=AE"
cookies = {"PHPSESSID":"h90gasrdrruf0t4sqrvlr6onnh",
"_gcl_au":"1.1.459429746.1630175151",
"_ga_4JBHWLMXXQ": "GS1.1.1630175150.1.0.1630175152.0",
"_ga":"GA1.2.1147785651.1630175151",
"_gid":"GA1.2.1338887027.1630175152"
}
data = {'draw': 3, 'columns[0][data]':0, 'columns[0][name]' :'', 'columns[0][searchable]' : 'true', 'columns[0][orderable]' : 'false', 'columns[0][search][value]' :'', 'columns[0][search][regex]' :'false', 'columns[1][data]':1,'columns[1][name]':'','columns[1][searchable]':'true', 'columns[1][orderable]':'false', 'columns[1][search][value]': '', 'columns[1][search][regex]':'false', 'columns[2][data]':2, 'columns[2][name]':'', 'columns[2][searchable]':'true', 'columns[2][orderable]':'false', 'columns[2][search][value]':'', 'columns[2][search][regex]':'false', 'order[0][column]' :0, 'order[0][dir]':'asc', 'start': int(start), 'length':25, 'search[value]':'', 'search[regex]': 'false'}
req = requests.get(url, data=data, headers=headers, cookies=cookies)
print(data)
with open("ps-ip-ranges.txt", "a") as fi:
fi.write(str(req.content)+"\n")
print("[+]\t"+str(start)+"\t is saved.")
if __name__ == '__main__':
for i in range(25,250000,25):
print_hi(i)

Amazon.com returns status 503

I am trying to get https://www.amazon.com content with Python Requests library. But I got an server error instantly. Here is the code:
import requests
response = requests.get('https://www.amazon.com')
print(response)
And this code returns <Response [503]>. Anyone can tell me why is this happening and how to fix this?
Amazon requires, that you specify User-Agent HTTP header to return 200 response:
import requests
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:80.0) Gecko/20100101 Firefox/80.0'}
response = requests.get('https://www.amazon.com', headers=headers)
print(response)
Prints:
<Response [200]>
Try this,
import requests
headers = {'User-Agent': 'Mozilla 5.0'}
response = requests.get('https://www.amazon.com', headers=headers)
print(response)
You have not put the code from which you want the info.
The code should be like this:
import requests
response = requests.get('https://www.amazon.com')
print(response.content)
also you can use json, status_code or text in place of content

Requests login into website only getting 403 error

I am trying to login into www.ebay-kleinanzeigen.de using the requests library, but every time I try to post my data (on the register page its the same as on the login page) I am getting a 403 error.
Here is the code for the register function:
import requests
from bs4 import BeautifulSoup
session = requests.Session()
user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
headers = {'user-agent': user_agent, 'Referer': 'https://www.ebay-kleinanzeigen.de'}
with requests.Session() as c:
url = 'https://www.ebay-kleinanzeigen.de/m-benutzer-anmeldung.html'
c.headers = headers
hp = c.get(url, headers=headers)
soup = BeautifulSoup(hp.content, 'html.parser')
crsf = soup.find('input', {'name': '_csrf'})['value']
print(crsf)
payload = dict(email='test.email#emailzz1.de', password='test123', passwordConfirmation='test123',
_marketingOptIn='on', _crsf=crsf)
page = c.post(url, data=payload, headers=headers)
print(page.text)
print(page.url)
print(page.status_code)
Is the problem that I need some more headers? Isn't a user-agent and a referrer enough?
I have tried adding all requested headers, but then I am getting no response.
I have managed to create a script that will successfully complete the register form you're trying to fill in using the mechanicalsoup library. Note you will have to manually check your email account for the email they send you to complete registration.
I realise this doesn't actually answer the question of why BeautifulSoup returned a 403 forbidden error however it does complete your task without encountering the same error.
import mechanicalsoup
browser = mechanicalsoup.StatefulBrowser()
browser.open("https://www.ebay-kleinanzeigen.de/m-benutzer-anmeldung.html")
browser.select_form('#registration-form')
browser.get_current_form().print_summary()
browser["email"] = "mailuser#emailprovider.com"
browser["password"] = "testSO12345"
browser["passwordConfirmation"] = "testSO12345"
response = browser.submit_selected()
rsp_code = response.status_code
#print(response.text)
print("Response code:",rsp_code)
if(rsp_code == 200):
print("Success! Opening a local debug copy of the page... (no CSS formatting)")
browser.launch_browser()
else:
print("Failure!")

Cookies and http requests

I have this url, the content are produced in this way (php, it's supose to generate a random cookie on every request):
setcookie('token', md5(time()), time()+99999);
if(isset($_COOKIE['token'])) {
echo 'Cookie: ' .$_COOKIE['token'];
die();
}
echo 'Cookie not set yet';
As you can see, the cookie changes on every reload/refresh of the page. Now i have a python (python3) script with three completely independent from each other requests:
import requests
def get_req_data(req):
print('\n\ntoken: ', req.cookies['token'])
print('headers we sent: ', req.request.headers)
print('headers server sent back: ', req.headers)
url = 'http://migueldvl.com/heya/login/tests2.php'
headers = {
"User-agent" : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:7.0.1) Gecko/20100101 Firefox/7.0.1',
"Referer": 'https://www.google.com'
}
req1 = requests.get(url, headers=headers)
get_req_data(req1)
req2 = requests.get(url, headers=headers)
get_req_data(req2)
req3 = requests.get(url, headers=headers)
get_req_data(req3)
How can be that we sometimes have the same cookie in diferent requests? If clearly it's program to change on every request?
If we:
import time
and add a
time.sleep(1) # wait one second before the next request
between requests, the cookie change all the time, this is the right and expected behaviour, but my question is why do we need this (time.sleep(1)) to be certain of the changing cookie? Wouldn't different requests be enough?

Why does this basic urllib code work, but this basic requests code not?

I'm trying to get a bot to log into a phpbb3 forum, which I can do in urllib. However, because it requires session IDs etc., it does not stay logged in when you change page (I think this is the problem). So I'm trying to use requests, but I cannot even get requests to log in, even though its easy to log in with urllib.
#!/usr/bin/env python3
import urllib
import http.cookiejar
from bs4 import BeautifulSoup
username = ''
password = ''
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent','Mozilla/5.0')]
auth_url = "http://www.mingeford365.co.uk/forum/ucp.php?mode=login"
payload = {'username' : username, 'password' : password,
"autologin" : "on", 'login' : 'Login'}
data = urllib.parse.urlencode(payload)
binary_data = data.encode('UTF-8')
req = urllib.request.Request(auth_url,binary_data)
resp = urllib.request.urlopen(req)
contents = resp.read().decode('UTF-8')
if username in contents:
print('logged in.')
The above code works. The below requests code does not work
#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
url = 'http://www.mingeford365.co.uk/forum/ucp.php?mode=login'
logininfo = {'username': '',
'password': '',
'autologin' : "on",
'login' : 'Login'}
headers = {'User-Agent' : 'Mozilla/5.0 (x11; Ubuntu; Linux x86; rv:28.0) Gecko/20100101 Firefox/28.0'}
#'Accept': 'text/html, application/xhtml+xhtml,application/xml;q=0.9,*/*;q=0.8',
#'Accept-Language': 'en-gb,en;q=0.5',
#'Accept-Encoding': 'gzip, deflate',
#'referer': 'http://www.mingeford365.co.uk/forum/index.php',
#'Connection' : 'keep-alive',
#'Content-Type': 'application/x-www-form-urlencoded'}
session = requests.Session()
get_session_id = session.get("http://www.mingeford365.co.uk/forum",headers=headers)
print(get_session_id.status_code)
response = session.post(url,params=logininfo,headers=headers) #cookies=get_session_id.cookies
soup = BeautifulSoup(response.text)
print(soup.get_text())
You are putting your POST body parameters in your URL. Use data, not params:
response = session.post(url, data=logininfo, headers=headers)

Categories