I am learning python networking. I had learnt socket and now I want to learn python HTTP to connect to HTTPServer, extract cookies etc. I am facing this problem with cookie extraction. Tried google but didn't found a solution, here is the code:
import cookielib
import urllib
import urllib2
ID_USERNAME= 'id_username'
ID_PASSWORD = 'id_password'
USERNAME = 'you#email.com'
PASSWORD = 'mypassword'
LOGIN_URL = 'https://bitbucket.org/account/signin/?next=/'
NORMAL_URL = 'https://bitbucket.org/'
def extract_cookie_info():
cj=cookielib.CookieJar()
login_data= urllib.urlencode({ID_USERNAME : USERNAME,ID_PASSWORD:PASSWORD})
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
resp = opener.open(LOGIN_URL,login_data)
for cookie in cj:
print "First time cookie: %s ----> %s" %(cookie.name,cookie.value)
print "Headers: %s"%resp.headers
resp = opener.open(NORMAL_URL)
for cookie in cj:
print "Second time cookie: %s --> %s"%(cookie.name,cookie.value)
print "Headers : %s"%resp.headers
if __name__ == '__main__':
extract_cookie_info()
This is the error:
Traceback (most recent call last):
File "e.py",line 27,in <module>
extract_cookie_info()
File "e.py",line 16,in extract_cookie_info
resp=opener.open(LOGIN_URL,login_data)
File "C:\Python27\lib\urllib2.py",line 435, in open
response = meth(req,response)
File "C:\Python27\lib\urllib2.py", line 548, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python27\lib\urllib2.py", line 473, in error
return self._call_chain(*args)
File "C:\Python27\lib\urllib2.py", line 407, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 556, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 403: Forbidden
You are sending your login details as POST data rather than as part of the url.
>>> url = 'https://bitbucket.org/account/signin/'
>>> user = 'foo#example.com'
>>> pwd = 'secret'
>>> d = urlencode({'ID_USERNAME': user, 'ID_PASSWORD': pwd})
>>> cj = cookielib.CookieJar()
>>> opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
>>> resp = opener.open(url + '?' + d)
>>> res.getcode()
200
>>> for cookie in cj:print cookie.name
...
csrftoken
Related
I am trying to run a sample code where I retrieve a list of ontology names from a website and I get this error. I'm not really sure what is going on and what I should do to fix this issue. Any help would be greatly appreciated!
This is the code I am trying to run:
import urllib.request, urllib.error, urllib.parse
import json
import ssl
import requests
import os
from pprint import pprint
REST_URL = "http://data.bioontology.org"
API_KEY = ""
def get_json(url):
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ctx))
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
return json.loads(opener.open(url).read())
# Get the available resources
resources = get_json(REST_URL + "/")
# Get the ontologies from the `ontologies` link
ontologies = get_json(resources["links"]["ontologies"])
# Get the name and ontology id from the returned list
ontology_output = []
for ontology in ontologies:
ontology_output.append(f"{ontology['name']}\n{ontology['#id']}\n")
# Print the first ontology in the list
pprint(ontologies[0])
# Print the names and ids
print("\n\n")
for ont in ontology_output:
print(ont)
This is the error message I am getting:
Traceback (most recent call last):
File "listOnt.py", line 23, in <module>
ontologies = get_json(resources["links"]["ontologies"])
File "listOnt.py", line 17, in get_json
return json.loads(opener.open(url).read())
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 531, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 640, in http_response
response = self.parent.error(
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 569, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 502, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/urllib/request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 401: Unauthorized
I have used requests library and I know how to work with it, but I need to work with standard library only, so I would appreciate if you don't encourage me to use requests instead.
I made a flask server that handles POST requests and then from a different script I call urllib to make POST calls to the flask server. I need to send a raw json in body just like we do in Postman.
Flask Server
from flask import Flask, request
app = Flask(__name__)
#app.route('/random', methods=['POST'])
def random():
if request.method == 'POST':
if request.headers.get('Authorization') and request.headers.get('Content-Type') == 'application/json':
print(request.get_json())
return "Success"
else:
print(request.get_json())
return "Bad request"
app.run(host='0.0.0.0', port=5000, debug=True)
Urllib Client (saved as test.py) -
import urllib.request
import urllib.parse
d = {"spam": 1, "eggs": 2, "bacon": 0}
data = urllib.parse.urlencode(d)
data = data.encode()
req = urllib.request.Request("http://localhost:5000/random", data)
req.add_header('Content-Type', 'application/json')
req.add_header('Authorization', 12345)
with urllib.request.urlopen(req) as f:
print(f.read().decode('utf-8'))
With only Authorization header I get Bad Request as output and the json is None on the flask server side as expected.
With ONLY Content-Type header OR both the headers I get this error -
Traceback (most recent call last):
File "test.py", line 9, in <module>
with urllib.request.urlopen(req) as f:
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 400: BAD REQUEST
The whole thing is simple enough, but I am not able to understand why is this happening and the error message doesn't help much either.
The server is failing in request.get_json(). It's only happening when the client sends both headers because that's when it reaches this line.
To fix it, change the client to send the data as JSON:
import json # <-- Import json
import urllib.request
import urllib.parse
d = {"spam": 1, "eggs": 2, "bacon": 0}
data = json.dumps(d) # <-- Dump the dictionary as JSON
data = data.encode()
req = urllib.request.Request("http://localhost:5000/random", data)
req.add_header('Content-Type', 'application/json')
req.add_header('Authorization', 12345)
with urllib.request.urlopen(req) as f:
print(f.read().decode('utf-8'))
I hope this helps
I'm trying to make a request to the GitHub API with Python 3 urllib to create a release, but I made some mistake and it fails with an exception:
Traceback (most recent call last):
File "./a.py", line 27, in <module>
'Authorization': 'token ' + token,
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/usr/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/usr/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 422: Unprocessable Entity
GitHub however is nice, and explains why it failed on the response body as shown at: 400 vs 422 response to POST of data
So, how do I read the response body? Is there a way to prevent the exception from being raised?
I've tried to catch the exception and explore it in ipdb, which gives an object of type urllib.error.HTTPError but I couldn't find that body data there, only headers.
The script:
#!/usr/bin/env python3
import json
import os
import sys
from urllib.parse import urlencode
from urllib.request import Request, urlopen
repo = sys.argv[1]
tag = sys.argv[2]
upload_file = sys.argv[3]
token = os.environ['GITHUB_TOKEN']
url_template = 'https://{}.github.com/repos/' + repo + '/releases'
# Create.
_json = json.loads(urlopen(Request(
url_template.format('api'),
json.dumps({
'tag_namezxcvxzcv': tag,
'name': tag,
'prerelease': True,
}).encode(),
headers={
'Accept': 'application/vnd.github.v3+json',
'Authorization': 'token ' + token,
},
)).read().decode())
# This is not the tag, but rather some database integer identifier.
release_id = _json['id']
usage: Can someone give a python requests example of uploading a release asset in github?
The HTTPError has a read() method that allows you to read the response body. So in your case, you should be able to do something such as:
try:
body = urlopen(Request(
url_template.format('api'),
json.dumps({
'tag_namezxcvxzcv': tag,
'name': tag,
'prerelease': True,
}).encode(),
headers={
'Accept': 'application/vnd.github.v3+json',
'Authorization': 'token ' + token,
},
)).read().decode()
except urllib.error.HTTPError as e:
body = e.read().decode() # Read the body of the error response
_json = json.loads(body)
The docs explain in more detail how the HTTPError instance can be used as a response, and some of its other attributes.
The following curl request works perfectly when I run it through Windows' CMD.
curl -XPOST -H"Content-Type:application/json" http://my_url:8082/druid/v2/sql/ -d "{\"query\":\"SELECT DISTINCT(event) FROM programs\"}"
I am trying to replicate the same call in python 3 using urllib.requests
import urllib.request
values = {'query':'SELECT DISTINCT(event) FROM programs'}
url = 'http://my_url:8082/druid/v2/sql'
data = urllib.parse.urlencode(values).encode("utf-8")
req = urllib.request.Request(url, data)
req.add_header("Content-Type","application/json")
response = urllib.request.urlopen(req)
the_page = response.read()
However the python version is returning a Server Error
response = urllib.request.urlopen(req)
File "C:\Python\Python35\lib\urllib\request.py", line 163, in urlopen
return opener.open(url, data, timeout)
File "C:\Python\Python35\lib\urllib\request.py", line 472, in open
response = meth(req, response)
File "C:\Python\Python35\lib\urllib\request.py", line 582, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python\Python35\lib\urllib\request.py", line 510, in error
return self._call_chain(*args)
File "C:\Python\Python35\lib\urllib\request.py", line 444, in _call_chain
result = func(*args)
File "C:\Python\Python35\lib\urllib\request.py", line 590, in
http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Server Error
Can someone tell me what I'm doing wrong please?
You need to convert your data to json first, you are sending a dictionary.
import json
data = urllib.parse.urlencode(json.dumps(values)).encode("utf-8")
Or, use the requests library:
import requests
d = {'query':'SELECT DISTINCT(event) FROM programs'}
url = 'http://my_url:8082/druid/v2/sql'
r = requests.post(url, json=d)
r.raise_for_status()
print(r.text)
After some more experimentation, I managed to make the code work by replacing urllib.parse.urlencode with json.dumps. The code now reads as follows:
import urllib.request, json
values = {'query':'SELECT DISTINCT(event) FROM programs'}
url = 'http://my_url/druid/v2/sql'
data = json.dumps(values).encode("utf-8")
req = urllib.request.Request(url, data)
req.add_header("Content-Type","application/json")
response = urllib.request.urlopen(req)
the_page = response.read()
I want to send some HTTP requests to Twitter in Python in order to create a sign in for Twitter users for my app. I am using urllib, and following this link: https://dev.twitter.com/web/sign-in/implementing.
But I am unable to do this. I guess I need to authenticate before requesting a token but I don't know how to do that.
Code:
import urllib.request
req = urllib.request.Request("https://api.twitter.com/oauth/authenticate",
headers={'User-Agent': 'Mozilla/5.0'})
html = urllib.request.urlopen(req).read() //after this statement im
getting the error
Error:
Traceback (most recent call last):
File "<pyshell#5>", line 1, in <module>
html = urllib.request.urlopen(req).read()
File "C:\Python34\lib\urllib\request.py", line 161, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 469, in open
response = meth(req, response)
File "C:\Python34\lib\urllib\request.py", line 579, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python34\lib\urllib\request.py", line 507, in error
return self._call_chain(*args)
File "C:\Python34\lib\urllib\request.py", line 441, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 587, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
If you go to the url with a browser it shows you that you need a key:
Whoa there!
There is no request token for this page. That's the special key we need from applications asking to use your Twitter account. Please go back to the site or application that sent you here and try again; it was probably just a mistake.
If you go to this link it will let you choose one of your apps and
it will bring you to a signature-generator that will show you the request settings.
To get a request_token you can use requests_oauthlib:
import requests
from requests_oauthlib import OAuth1
REQUEST_TOKEN_URL = "https://api.twitter.com/oauth/request_token"
CONSUMER_KEY = "xxxxxxxx
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxx"
oauth = OAuth1(CONSUMER_KEY, client_secret=CONSUMER_SECRET)
r = requests.post(url=REQUEST_TOKEN_URL, auth=oauth)
print(r.content)
oauth_token=xxxxxxxxxxxxxx&oauth_token_secret=xxxxxxxxxxx&oauth_callback_confirmed=true
You then need to extract the oauth_token oauth_token_secret:
from urlparse import parse_qs
import webbrowser
data = parse_qs(r.content)
oauth_token = data['oauth_token'][0]
oauth_token_secret = data['oauth_token_secret'][0]
AUTH = "https://api.twitter.com/oauth/authorize?oauth_token={}"
auth = AUTH.format(oauth_token)
webbrowser.open(auth)
A webpage will open asking to Authorize your_app to use your account?
For python 3 use:
from urllib.parse import parse_qs
data = parse_qs(r.text)
oauth_token = data['oauth_token'][0]
oauth_token_secret = data['oauth_token_secret'][0]