How to use multiple proxies with requests library, python? - python

I have a list of proxies which I want requests lib to use it. Because some of them don't work I want to change the proxy each time one doesn't respond.
I have this code(I just tried if it would work this way it's not final)
import requests
import itertools
HTTP = [List of HTTP proxies...]
HTTPS = [list of https proxies...]
def try_proxies(http_proxies, https_proxies):
for proxy_http_element in http_proxies:
http_proxy = proxy_http_element
yield http_proxy
for proxy_https_element in https_proxies:
https_proxy = proxy_https_element
yield https_proxy
proxy_result = try_proxies(HTTP, HTTPS)
print(proxy_result)
# proxies = {
# 'http': http_proxy,
# 'https': https_proxy
# }
#
# try:
# res = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=5)
# print(res)
# except requests.exceptions.ConnectTimeout:
# print("well at last your tried")
def main():
try_proxies(HTTP, HTTPS)
main()
but it doesn't work like I want it to work and I know that because it's not correct. How is the best way to do this?
Edit: I lowkey improved the code but still didn't get the result I expected because it doesn't test each proxy but the last value of https_proxy
import requests
HTTP = [List of HTTP proxies...]
HTTPS = [List of HTTPS proxies...]
def try_proxies(http_proxies, https_proxies):
for proxy_http_element in http_proxies:
http_proxy = proxy_http_element
for proxy_https_element in https_proxies:
https_proxy = proxy_https_element
proxies = {
'http': http_proxy,
'https': https_proxy,
}
try:
res = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=20)
print("here")
print(res.status_code)
except requests.exceptions.ConnectTimeout:
print("well at last your tried")
def main():
try_proxies(HTTP, HTTPS)
main()

I solved the issue in this way:
import requests
HTTP = [List of http proxies...]
HTTPS = [List of https proxies]
def try_proxies(http_proxies, https_proxies):
for proxy_http_element in http_proxies:
http_proxy = proxy_http_element
yield http_proxy
for proxy_https_element in https_proxies:
https_proxy = proxy_https_element
yield https_proxy
test = [p for p in try_proxies(HTTP, HTTPS)]
for proxy in test:
print(f"{proxy}")
proxies = {
'http': proxy
}
try:
res = requests.get("https://httpbin.org/ip", proxies=proxies, timeout=10)
print("here")
print(res.status_code)
except requests.exceptions.ConnectTimeout as CT:
print(f"well at last your tried: {CT}")
I hope this can be helpful to someone else!
Edit: I removed the 'https': proxy because because of that I had an error when I removed that the proxies worked!

Related

Setting proxy to urllib.request (Python3)

How can I set proxy for the last urllib in Python 3.
I am doing the next
from urllib import request as urlrequest
ask = urlrequest.Request(url) # note that here Request has R not r as prev versions
open = urlrequest.urlopen(req)
open.read()
I tried adding proxy as follows :
ask=urlrequest.Request.set_proxy(ask,proxies,'http')
However I don't know how correct it is since I am getting the next error:
336 def set_proxy(self, host, type):
--> 337 if self.type == 'https' and not self._tunnel_host:
338 self._tunnel_host = self.host
339 else:
AttributeError: 'NoneType' object has no attribute 'type'
You should be calling set_proxy() on an instance of class Request, not on the class itself:
from urllib import request as urlrequest
proxy_host = 'localhost:1234' # host and port of your proxy
url = 'http://www.httpbin.org/ip'
req = urlrequest.Request(url)
req.set_proxy(proxy_host, 'http')
response = urlrequest.urlopen(req)
print(response.read().decode('utf8'))
I needed to disable the proxy in our company environment, because I wanted to access a server on localhost. I could not disable the proxy server with the approach from #mhawke (tried to pass {}, None and [] as proxies).
This worked for me (can also be used for setting a specific proxy, see comment in code).
import urllib.request as request
# disable proxy by passing an empty
proxy_handler = request.ProxyHandler({})
# alertnatively you could set a proxy for http with
# proxy_handler = request.ProxyHandler({'http': 'http://www.example.com:3128/'})
opener = request.build_opener(proxy_handler)
url = 'http://www.example.org'
# open the website with the opener
req = opener.open(url)
data = req.read().decode('utf8')
print(data)
Urllib will automatically detect proxies set up in the environment - so one can just set the HTTP_PROXY variable either in your environment e.g. for Bash:
export HTTP_PROXY=http://proxy_url:proxy_port
or using Python e.g.
import os
os.environ['HTTP_PROXY'] = 'http://proxy_url:proxy_port'
Note from the urllib docs: "HTTP_PROXY[environment variable] will be ignored if a variable REQUEST_METHOD is set; see the documentation on getproxies()"
import urllib.request
def set_http_proxy(proxy):
if proxy == None: # Use system default setting
proxy_support = urllib.request.ProxyHandler()
elif proxy == '': # Don't use any proxy
proxy_support = urllib.request.ProxyHandler({})
else: # Use proxy
proxy_support = urllib.request.ProxyHandler({'http': '%s' % proxy, 'https': '%s' % proxy})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
proxy = 'user:pass#ip:port'
set_http_proxy(proxy)
url = 'https://www.httpbin.org/ip'
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
html = response.read()
html

Proxy settings in requests library Python

I have a Python script used to connect to Parse.com (remote server) and upload a file. The script runs off a server that sits behind a corporate firewall.
import env
import json
import requests
from requests.auth import HTTPProxyAuth
def uploadFile(fileFullPath):
print "Attempting to upload file: " + fileFullPath
proxies = {
"http": "http://10.128.198.14",
"https": "http://10.128.198.14"
}
auth = HTTPProxyAuth('MyDomain\MyUsername', 'MyPassord')
headers = {
"X-Parse-Application-Id": env.X_Parse_APP_ID,
"X-Parse-REST-API-Key": env.X_Parse_REST_API_Key,
"Content-Type": "application/pdf"
}
f = open(fileFullPath, 'r')
files = {'file': f}
r = requests.post(env.PARSE_HOSTNAME + env.PARSE_FILES_ENDPOINT + "/" + env.PARSE_FILE_NAME, files=files, headers=headers, timeout=10, verify=False, proxies=proxies)
print r.text
When I used this module from the command prompt, I got the following message:
ConnectionError thrown. Details: Cannot connect to proxy. Socket error: Tunnel connection failed: 407 Proxy Authentication Required.
I am pretty sure the username and password are both correct.
Any solution? Thanks!
The reason for the 407 error is that the proxy itself needs to be authenticated. So for your proxies dict, do the following:
proxies = {
"http": "http://user:pass#10.128.198.14",
"https": "http://user:pass#10.128.198.14"
}
Fill in the user and pass variables in the proxies urls. Here is a link to the relevant requests documentation on how to build proxy objects and have them authenticated.

Requests - proxies dictionary

I'm little confused about requests module, especially proxies.
From documentation:
PROXIES
Dictionary mapping protocol to the URL of the proxy (e.g. {‘http’:
‘foo.bar:3128’}) to be used on each Request.
May there be more proxies of one type in the dictionary? I mean is it possible to put there list of proxies and requests module will try them and use only those which are working?
Or there can be only one proxy address for example for http?
Using the proxies parameter is limited by the very nature of a python dictionary (i.e. each key must be unique).
import requests
url = 'http://google.com'
proxies = {'https': '84.22.41.1:3128',
'http': '185.26.183.14:80',
'http': '178.33.230.114:3128'}
if __name__ == '__main__':
print url
print proxies
response = requests.get(url, proxies=proxies)
if response.status_code == 200:
print response.text
else:
print 'Response ERROR', response.status_code
outputs
http://google.com
{'http': '178.33.230.114:3128', 'https': '84.22.41.1:3128'}
<!doctype html><html itemscope="" itemtype="http://schema.org/WebPage" lang="en"><head><meta content="Search the world's information, including webpages, images, videos and more. Google has many special features to help you find exactly what you're looking for."
...more html...
As you can see, the value of the http protocol key in the proxies dictionary corresponds to the last encountered in its assignment (i.e. 178.33.230.114:3128). Try swapping the http entries around.
So, the answer is no, you cannot specify multiple proxies for the same protocol using a simple dictionary.
I have tried using an iterable as a value, which would make sense to me
proxies = {'https': '84.22.41.1:3128',
'http': ('178.33.230.114:3128', '185.26.183.14:80', )}
but with no luck, it produces an error
Well, actually you can, I've done this with a few lines of code and it works pretty well.
import requests
class Client:
def __init__(self):
self._session = requests.Session()
self.proxies = None
def set_proxy_pool(self, proxies, auth=None, https=True):
"""Randomly choose a proxy for every GET/POST request
:param proxies: list of proxies, like ["ip1:port1", "ip2:port2"]
:param auth: if proxy needs auth
:param https: default is True, pass False if you don't need https proxy
"""
from random import choice
if https:
self.proxies = [{'http': p, 'https': p} for p in proxies]
else:
self.proxies = [{'http': p} for p in proxies]
def get_with_random_proxy(url, **kwargs):
proxy = choice(self.proxies)
kwargs['proxies'] = proxy
if auth:
kwargs['auth'] = auth
return self._session.original_get(url, **kwargs)
def post_with_random_proxy(url, *args, **kwargs):
proxy = choice(self.proxies)
kwargs['proxies'] = proxy
if auth:
kwargs['auth'] = auth
return self._session.original_post(url, *args, **kwargs)
self._session.original_get = self._session.get
self._session.get = get_with_random_proxy
self._session.original_post = self._session.post
self._session.post = post_with_random_proxy
def remove_proxy_pool(self):
self.proxies = None
self._session.get = self._session.original_get
self._session.post = self._session.original_post
del self._session.original_get
del self._session.original_post
# You can define whatever operations using self._session
I use it like this:
client = Client()
client.set_proxy_pool(['112.25.41.136', '180.97.29.57'])
It's simple, but actually works for me.

Urllib2 Seems to Ignore Proxy Settings

I'm behind a proxy and would like to use urllib2 to access external sites. If I set up the proxy in my environment, I can access external sites. When I set a proxy in urllib2, it seems to be ignored and the access fails.
The code I'm using is:
import urllib2
import os
import sys
uri = "https://www.python.org"
http_proxy_server = "192.168.12.20"
http_proxy_port = "8082"
http_proxy = "http://%s:%s" % (http_proxy_server, http_proxy_port)
def open_url_no_proxy():
sys.stdout.write('Proxy (none): ')
proxy_handler = urllib2.ProxyHandler({})
opener = urllib2.build_opener(proxy_handler)
try:
opener.open(uri)
sys.stdout.write('PASS\n')
except urllib2.URLError:
sys.stdout.write('FAIL\n')
def open_url_system_proxy():
sys.stdout.write('Proxy (system): ')
opener = urllib2.build_opener()
try:
opener.open(uri)
sys.stdout.write('PASS\n')
except urllib2.URLError:
sys.stdout.write('FAIL\n')
def open_url_installed_opener():
sys.stdout.write('Proxy (installed): ')
proxy_handler = urllib2.ProxyHandler({"http": http_proxy})
opener = urllib2.build_opener(proxy_handler)
try:
opener.open(uri)
sys.stdout.write('PASS\n')
except urllib2.URLError:
sys.stdout.write('FAIL\n')
if __name__ == "__main__":
os.environ['no_proxy'] = 'localhost,127.0.0.1'
os.environ['NO_PROXY'] = 'localhost,127.0.0.1'
os.environ['http_proxy'] = http_proxy
os.environ['HTTP_PROXY'] = http_proxy
open_url_system_proxy()
open_url_no_proxy()
open_url_system_proxy()
open_url_installed_opener()
open_url_system_proxy()
The response I get on my system is:
$ python proxytest2.py
Proxy (system): PASS
Proxy (none): FAIL
Proxy (system): PASS
Proxy (installed): FAIL
Proxy (system): PASS
What am I doing wrong?
You've set up only a proxy for HTTP in the line below, but you're accessing an HTTPS site:
proxy_handler = urllib2.ProxyHandler({"http": http_proxy})
You need to modify this to
proxy_handler = urllib2.ProxyHandler({"http": http_proxy, "https": http_proxy})

How can I open a website with urllib via proxy in Python?

I have this program that check a website, and I want to know how can I check it via proxy in Python...
this is the code, just for example
while True:
try:
h = urllib.urlopen(website)
break
except:
print '['+time.strftime('%Y/%m/%d %H:%M:%S')+'] '+'ERROR. Trying again in a few seconds...'
time.sleep(5)
By default, urlopen uses the environment variable http_proxy to determine which HTTP proxy to use:
$ export http_proxy='http://myproxy.example.com:1234'
$ python myscript.py # Using http://myproxy.example.com:1234 as a proxy
If you instead want to specify a proxy inside your application, you can give a proxies argument to urlopen:
proxies = {'http': 'http://myproxy.example.com:1234'}
print("Using HTTP proxy %s" % proxies['http'])
urllib.urlopen("http://www.google.com", proxies=proxies)
Edit: If I understand your comments correctly, you want to try several proxies and print each proxy as you try it. How about something like this?
candidate_proxies = ['http://proxy1.example.com:1234',
'http://proxy2.example.com:1234',
'http://proxy3.example.com:1234']
for proxy in candidate_proxies:
print("Trying HTTP proxy %s" % proxy)
try:
result = urllib.urlopen("http://www.google.com", proxies={'http': proxy})
print("Got URL using proxy %s" % proxy)
break
except:
print("Trying next proxy in 5 seconds")
time.sleep(5)
Python 3 is slightly different here. It will try to auto detect proxy settings but if you need specific or manual proxy settings, think about this kind of code:
#!/usr/bin/env python3
import urllib.request
proxy_support = urllib.request.ProxyHandler({'http' : 'http://user:pass#server:port',
'https': 'https://...'})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
with urllib.request.urlopen(url) as response:
# ... implement things such as 'html = response.read()'
Refer also to the relevant section in the Python 3 docs
Here example code guide how to use urllib to connect via proxy:
authinfo = urllib.request.HTTPBasicAuthHandler()
proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
# build a new opener that adds authentication and caching FTP handlers
opener = urllib.request.build_opener(proxy_support, authinfo,
urllib.request.CacheFTPHandler)
# install it
urllib.request.install_opener(opener)
f = urllib.request.urlopen('http://www.google.com/')
"""
For http and https use:
proxies = {'http':'http://proxy-source-ip:proxy-port',
'https':'https://proxy-source-ip:proxy-port'}
more proxies can be added similarly
proxies = {'http':'http://proxy1-source-ip:proxy-port',
'http':'http://proxy2-source-ip:proxy-port'
...
}
usage
filehandle = urllib.urlopen( external_url , proxies=proxies)
Don't use any proxies (in case of links within network)
filehandle = urllib.urlopen(external_url, proxies={})
Use proxies authentication via username and password
proxies = {'http':'http://username:password#proxy-source-ip:proxy-port',
'https':'https://username:password#proxy-source-ip:proxy-port'}
Note: avoid using special characters such as :,# in username and passwords

Categories