What's the best way to specify a proxy with username and password for an http connection in python?
This works for me:
import urllib2
proxy = urllib2.ProxyHandler({'http': 'http://
username:password#proxyurl:proxyport'})
auth = urllib2.HTTPBasicAuthHandler()
opener = urllib2.build_opener(proxy, auth, urllib2.HTTPHandler)
urllib2.install_opener(opener)
conn = urllib2.urlopen('http://python.org')
return_str = conn.read()
Use this:
import requests
proxies = {"http":"http://username:password#proxy_ip:proxy_port"}
r = requests.get("http://www.example.com/", proxies=proxies)
print(r.content)
I think it's much simpler than using urllib. I don't understand why people love using urllib so much.
Setting an environment var named http_proxy like this: http://username:password#proxy_url:port
The best way of going through a proxy that requires authentication is using urllib2 to build a custom url opener, then using that to make all the requests you want to go through the proxy. Note in particular, you probably don't want to embed the proxy password in the url or the python source code (unless it's just a quick hack).
import urllib2
def get_proxy_opener(proxyurl, proxyuser, proxypass, proxyscheme="http"):
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, proxyurl, proxyuser, proxypass)
proxy_handler = urllib2.ProxyHandler({proxyscheme: proxyurl})
proxy_auth_handler = urllib2.ProxyBasicAuthHandler(password_mgr)
return urllib2.build_opener(proxy_handler, proxy_auth_handler)
if __name__ == "__main__":
import sys
if len(sys.argv) > 4:
url_opener = get_proxy_opener(*sys.argv[1:4])
for url in sys.argv[4:]:
print url_opener.open(url).headers
else:
print "Usage:", sys.argv[0], "proxy user pass fetchurls..."
In a more complex program, you can seperate these components out as appropriate (for instance, only using one password manager for the lifetime of the application). The python documentation has more examples on how to do complex things with urllib2 that you might also find useful.
Or if you want to install it, so that it is always used with urllib2.urlopen (so you don't need to keep a reference to the opener around):
import urllib2
url = 'www.proxyurl.com'
username = 'user'
password = 'pass'
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
# None, with the "WithDefaultRealm" password manager means
# that the user/pass will be used for any realm (where
# there isn't a more specific match).
password_mgr.add_password(None, url, username, password)
auth_handler = urllib2.HTTPBasicAuthHandler(password_mgr)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
print urllib2.urlopen("http://www.example.com/folder/page.html").read()
Here is the method use urllib
import urllib.request
# set up authentication info
authinfo = urllib.request.HTTPBasicAuthHandler()
proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
# build a new opener that adds authentication and caching FTP handlers
opener = urllib.request.build_opener(proxy_support, authinfo,
urllib.request.CacheFTPHandler)
# install it
urllib.request.install_opener(opener)
f = urllib.request.urlopen('http://www.python.org/')
"""
Related
I tried logging into quora using python. But it gives me the following error.
urllib2.HTTPError: HTTP Error 500: Internal Server Error
This is my code till now. I also work behind a proxy.
import urllib2
import urllib
import re
import cookielib
class Quora:
def __init__(self):
'''Initialising and authentication'''
auth = 'http://name:password#proxy:port'
cj = cookielib.CookieJar()
logindata = urllib.urlencode({'email' : 'email' , 'password' : 'password'})
handler = urllib2.ProxyHandler({'http' : auth})
opener = urllib2.build_opener(handler , urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
a = urllib2.urlopen('http://www.quora.com/' , logindata)
def main():
Quora()
Can someone please point out what is wrong?
if __name__ == '__main__':
main()
Try something like this:
# Load proxies
proxies = []
proxies_fp = open('proxies.txt', 'r') # A list of proxies
for proxy in proxies_fp:
proxies.append(proxy)
cookiejar = cookielib.CookieJar()
def perform_request(url, opener, credientials):
# Instantiate our request object
request = urllib2.Request(url)
# Perform the request, returning a pointer to the result set.
result = opener.urlopen(request, credentials)
return result
credentials ={
'username' : 'username',
'password' : 'password'
}
encoded_credentials = urllib.urlencode(credentials)
def main():
# Get random proxy
proxy = random.choice(proxies)
# Install our proxy
opener = urllib2.build_opener(
urllib2.ProxyHandler({'http': proxy}),
urllib2.HTTPRedirectHandler(),
urllib2.HTTPHandler(debuglevel=0),
urllib2.HTTPSHandler(debuglevel=0),
urllib2.HTTPCookieProcessor(cookiejar),
)
urllib2.install_opener(opener)
a = perform_request(url, opener, encoded_credentials)
--untested--
I've had to do something similar to this, and it worked for me this way. (Please note, that this is NOT an exact copy of code I used. I had to manipulate it a bit, and did NOT test)
I have this cURL call that works perfectly:
curl -H 'X-Requested-With: SO demo' -d 'parameter=value' https://username:password#api.domain.com/api/work/
My conversion does not work.
import urllib2
# Create a password manager.
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
# Add the username and password.
top_level_url = 'https://api.server.com'
password_mgr.add_password(None, top_level_url, 'username', 'password')
handler = urllib2.HTTPBasicAuthHandler(password_mgr)
# Create "opener" (OpenerDirector instance).
opener = urllib2.build_opener(handler)
# Install the opener so all calls to urllib2.urlopen use our opener.
urllib2.install_opener(opener)
# Create request.
headers = {'X-Requested-With':'SO demo.'}
uri = 'https://api.domain.com/api/work/'
data='parameter=value'
req = urllib2.Request(uri,data,headers)
# Make request to fetch url.
result = urllib2.urlopen(req)
urllib2.HTTPError: HTTP Error 401: Unauthorized
Here's what I don't get. The same server has a separate API which similar code does work on, where the only thing that has changed is the parameter and uri. Note the cURL call works on both API calls.
Second API cURL call (that works):
curl -H 'X-Requested-With: SO demo' -d 'parameter=value' https://username:password#api.domain.com/api2/call.php
Equivalent code that works below:
import urllib2
# Create a password manager.
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
# Add the username and password.
top_level_url = 'https://api.server.com'
password_mgr.add_password(None, top_level_url, 'username', 'password')
handler = urllib2.HTTPBasicAuthHandler(password_mgr)
# Create "opener" (OpenerDirector instance).
opener = urllib2.build_opener(handler)
# Install the opener.
# Now all calls to urllib2.urlopen use our opener.
urllib2.install_opener(opener)
# Create request.
headers = {'X-Requested-With':'SO demo.'}
uri = 'https://api.server.com/api2/call.php'
data='parameter=value'
req = urllib2.Request(uri,data,headers)
# Make request to fetch url.
result = urllib2.urlopen(req)
# Read results.
result.read()
Why does urllib2 work when the uri ends with a '.php', but not work when the uri ends with a '/'?
In the first request you are setting:
uri = 'https://api.domain.com/api/work/'
But if you were to do it the same as the second run, you probably meant to write it as:
uri = 'https://api.server.com/api/work/'
From Python urllib2 Basic Auth Problem
The problem [is] that the Python libraries, per HTTP-Standard, first send an unauthenticated request, and then only if it's answered with a 401 retry, are the correct credentials sent. If the ... servers don't do "totally standard authentication" then the libraries won't work.
This particular API does not respond with a 401 retry on the first attempt, it responds with an XML response containing the message that credentials were not sent.
I am fairly new to web programing but for the sake of it, I am trying to login to google account not using standard code but as a python application, but it is impossible to do so
has anyone tried to this before? can anyone help?
I made a python class that handle google login and the is able to get any google service page that requires the user to be logged in:
class SessionGoogle:
def __init__(self, url_login, url_auth, login, pwd):
self.ses = requests.session()
login_html = self.ses.get(url_login)
soup_login = BeautifulSoup(login_html.content).find('form').find_all('input')
my_dict = {}
for u in soup_login:
if u.has_attr('value'):
my_dict[u['name']] = u['value']
# override the inputs without login and pwd:
my_dict['Email'] = login
my_dict['Passwd'] = pwd
self.ses.post(url_auth, data=my_dict)
def get(self, URL):
return self.ses.get(URL).text
The idea is to go to the login page GALX hidden input value and send it back to google + login and password. It requires modules requests and beautifulSoup
Example of use:
url_login = "https://accounts.google.com/ServiceLogin"
url_auth = "https://accounts.google.com/ServiceLoginAuth"
session = SessionGoogle(url_login, url_auth, "myGoogleLogin", "myPassword")
print session.get("http://plus.google.com")
Hope this helps
Although probably not exactly what you were looking for here I found some code from a similar post that did run from me.
import urllib2
def get_unread_msgs(user, passwd):
auth_handler = urllib2.HTTPBasicAuthHandler()
auth_handler.add_password(
realm='New mail feed',
uri='https://mail.google.com',
user='%s#gmail.com' % user,
passwd=passwd
)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
feed = urllib2.urlopen('https://mail.google.com/mail/feed/atom')
return feed.read()
print get_unread_msgs("put-username-here","put-password-here")
reference:
How to auto log into gmail atom feed with Python?
2020 update for python 3:
import urllib.request
def unread_messages(user, passwd):
auth_handler = urllib.request.HTTPBasicAuthHandler()
auth_handler.add_password(
realm='New mail feed',
uri='https://mail.google.com',
user='%s#gmail.com' % user,
passwd=passwd
)
opener = urllib.request.build_opener(auth_handler)
urllib.request.install_opener(opener)
feed = urllib.request.urlopen('https://mail.google.com/mail/feed/atom')
return feed.read()
print(unread_messages('username', 'password'))
You can use urllib, urllib2 and cookielib libraries of python to login.
import urllib, urllib2, cookielib
def test_login():
username = '' # Gmail Address
password = '' # Gmail Password
cookie_jar = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar))
login_dict = urllib.urlencode({'username' : username, 'password' :password})
opener.open('https://accounts.google.com/ServiceLogin', login_dict)
response = opener.open('https://plus.google.com/explore')
print response.read()
if __name__ == '__main__':
test_login()
I have this program that check a website, and I want to know how can I check it via proxy in Python...
this is the code, just for example
while True:
try:
h = urllib.urlopen(website)
break
except:
print '['+time.strftime('%Y/%m/%d %H:%M:%S')+'] '+'ERROR. Trying again in a few seconds...'
time.sleep(5)
By default, urlopen uses the environment variable http_proxy to determine which HTTP proxy to use:
$ export http_proxy='http://myproxy.example.com:1234'
$ python myscript.py # Using http://myproxy.example.com:1234 as a proxy
If you instead want to specify a proxy inside your application, you can give a proxies argument to urlopen:
proxies = {'http': 'http://myproxy.example.com:1234'}
print("Using HTTP proxy %s" % proxies['http'])
urllib.urlopen("http://www.google.com", proxies=proxies)
Edit: If I understand your comments correctly, you want to try several proxies and print each proxy as you try it. How about something like this?
candidate_proxies = ['http://proxy1.example.com:1234',
'http://proxy2.example.com:1234',
'http://proxy3.example.com:1234']
for proxy in candidate_proxies:
print("Trying HTTP proxy %s" % proxy)
try:
result = urllib.urlopen("http://www.google.com", proxies={'http': proxy})
print("Got URL using proxy %s" % proxy)
break
except:
print("Trying next proxy in 5 seconds")
time.sleep(5)
Python 3 is slightly different here. It will try to auto detect proxy settings but if you need specific or manual proxy settings, think about this kind of code:
#!/usr/bin/env python3
import urllib.request
proxy_support = urllib.request.ProxyHandler({'http' : 'http://user:pass#server:port',
'https': 'https://...'})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
with urllib.request.urlopen(url) as response:
# ... implement things such as 'html = response.read()'
Refer also to the relevant section in the Python 3 docs
Here example code guide how to use urllib to connect via proxy:
authinfo = urllib.request.HTTPBasicAuthHandler()
proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
# build a new opener that adds authentication and caching FTP handlers
opener = urllib.request.build_opener(proxy_support, authinfo,
urllib.request.CacheFTPHandler)
# install it
urllib.request.install_opener(opener)
f = urllib.request.urlopen('http://www.google.com/')
"""
For http and https use:
proxies = {'http':'http://proxy-source-ip:proxy-port',
'https':'https://proxy-source-ip:proxy-port'}
more proxies can be added similarly
proxies = {'http':'http://proxy1-source-ip:proxy-port',
'http':'http://proxy2-source-ip:proxy-port'
...
}
usage
filehandle = urllib.urlopen( external_url , proxies=proxies)
Don't use any proxies (in case of links within network)
filehandle = urllib.urlopen(external_url, proxies={})
Use proxies authentication via username and password
proxies = {'http':'http://username:password#proxy-source-ip:proxy-port',
'https':'https://username:password#proxy-source-ip:proxy-port'}
Note: avoid using special characters such as :,# in username and passwords
How do you open https url in Python?
import urllib2
url = "https://user:password#domain.com/path/
f = urllib2.urlopen(url)
print f.read()
gives:
httplib.InvalidURL: nonnumeric port: 'password#domain.com'
This has never failed me
import urllib2, base64
username = 'foo'
password = 'bar'
auth_encoded = base64.encodestring('%s:%s' % (username, password))[:-1]
req = urllib2.Request('https://somewebsite.com')
req.add_header('Authorization', 'Basic %s' % auth_encoded)
try:
response = urllib2.urlopen(req)
except urllib2.HTTPError, http_e:
# etc...
pass
Please read about the urllib2 password manager and the basic authentication handler as well as the digest authentication handler.
http://docs.python.org/library/urllib2.html#abstractbasicauthhandler-objects
http://docs.python.org/library/urllib2.html#httpdigestauthhandler-objects
Your urllib2 script must actually provide enough information to do HTTP authentication. Usernames, Passwords, Domains, etc.
If you want to pass username and password information to urllib2 you'll need to use an HTTPBasicAuthHandler.
Here's a tutorial showing you how to do it.
You cannot pass credentials to urllib2.open like that. In your case, user is interpreted as the domain name, while password#domain.com is interpreted as the port number.