Proxy Check in python - python

I have written a script in python that uses cookies and POST/GET. I also included proxy support in my script. However, when one enters a dead proxy, the script crashes. Is there any way to check if a proxy is dead/alive before running the rest of my script?
Furthermore, I noticed that some proxies don't handle cookies/POST headers properly. Is there any way to fix this?

The simplest was is to simply catch the IOError exception from urllib:
try:
urllib.urlopen(
"http://example.com",
proxies={'http':'http://example.com:8080'}
)
except IOError:
print "Connection error! (Check proxy)"
else:
print "All was fine"
Also, from this blog post - "check status proxy address" (with some slight improvements):
for python 2
import urllib2
import socket
def is_bad_proxy(pip):
try:
proxy_handler = urllib2.ProxyHandler({'http': pip})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request('http://www.example.com') # change the URL to test here
sock=urllib2.urlopen(req)
except urllib2.HTTPError, e:
print 'Error code: ', e.code
return e.code
except Exception, detail:
print "ERROR:", detail
return True
return False
def main():
socket.setdefaulttimeout(120)
# two sample proxy IPs
proxyList = ['125.76.226.9:80', '213.55.87.162:6588']
for currentProxy in proxyList:
if is_bad_proxy(currentProxy):
print "Bad Proxy %s" % (currentProxy)
else:
print "%s is working" % (currentProxy)
if __name__ == '__main__':
main()
for python 3
import urllib.request
import socket
import urllib.error
def is_bad_proxy(pip):
try:
proxy_handler = urllib.request.ProxyHandler({'http': pip})
opener = urllib.request.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
req=urllib.request.Request('http://www.example.com') # change the URL to test here
sock=urllib.request.urlopen(req)
except urllib.error.HTTPError as e:
print('Error code: ', e.code)
return e.code
except Exception as detail:
print("ERROR:", detail)
return True
return False
def main():
socket.setdefaulttimeout(120)
# two sample proxy IPs
proxyList = ['125.76.226.9:80', '25.176.126.9:80']
for currentProxy in proxyList:
if is_bad_proxy(currentProxy):
print("Bad Proxy %s" % (currentProxy))
else:
print("%s is working" % (currentProxy))
if __name__ == '__main__':
main()
Remember this could double the time the script takes, if the proxy is down (as you will have to wait for two connection-timeouts).. Unless you specifically have to know the proxy is at fault, handling the IOError is far cleaner, simpler and quicker..

you can use the Proxy-checker library which is as simple as this
from proxy_checker import ProxyChecker
checker = ProxyChecker()
checker.check_proxy('<ip>:<port>')
output :
{
"country": "United States",
"country_code": "US",
"protocols": [
"socks4",
"socks5"
],
"anonymity": "Elite",
"timeout": 1649
}
with the possibility of generating your own proxies and check them with two lines of code

you can use ip-getter website to get the IP by which you are sending a request, then check if the IP is the same as your proxy IP or some thing else. Here is a script for that matter:
import requests
proxy_ip = "<IP>"
proxy_port = "<PORT>"
proxy_user = "<USERNAME>"
proxy_pass = "<PASSWORD>"
proxies = {
"http": f"http://{proxy_user}:{proxy_pass}#{proxy_ip}:{proxy_port}/",
"https": f"http://{proxy_user}:{proxy_pass}#{proxy_ip}:{proxy_port}/"
}
url = 'https://api.ipify.org'
try:
response = requests.get(url, proxies=proxies)
assert response.text==proxy_ip
except:
print("Proxy does not work")

I think that the better approach is like dbr said, handling the exception.
Another solution that could be better in some cases, is to use an external online proxy checker tool to check if a proxy server is alive and then continue using your script without any modification.

There is one nice package Grab
So, if it ok for you, you can write something like this(simple valid proxy checker-generator):
from grab import Grab, GrabError
def get_valid_proxy(proxy_list): #format of items e.g. '128.2.198.188:3124'
g = Grab()
for proxy in proxy_list:
g.setup(proxy=proxy, proxy_type='http', connect_timeout=5, timeout=5)
try:
g.go('google.com')
except GrabError:
#logging.info("Test error")
pass
else:
yield proxy

Related

Creating an all in one requests function for shorter easier code in python3

I'm Trying to create an all in one function that handles all my API requests and cuts down on lots of repeated code especially with all of the error handling for different error codes.
I am using a few files different files to achieve this a.py that connects to "api a" and b.py that connect to "api b" and api.py that contains the function
a.py and b.py both start with
from api import *
and use
login_response = post_api_call(api_url_base + login_url, None , login_data).json()
or similar
api.py contains the below, but will be fleshed out with more error handling with retries etc which is what I don't want to be repeating.
import requests
import logging
def post_api_call (url, headers, data):
try:
response = requests.post(url, headers=headers, data=data)
response.raise_for_status()
except requests.exceptions.HTTPError as errh:
print ("Http Error:",errh)
logging.warning("Http Error:" + errh)
except requests.exceptions.ConnectionError as errc:
print ("Error Connecting:",errc)
logging.warning ("Error Connecting:" + errc)
except requests.exceptions.Timeout as errt:
print ("Timeout Error:",errt)
logging.warning ("Timeout Error:" + errt)
# only use above if want to retry certain errors, below should catch all of above if needed.
except requests.exceptions.RequestException as err:
print ("OOps: Something Else",err)
logging.warning ("OOps: Something Else" + err)
# retry certain errors...
return response
The above works and isn't an issue.
The issue I'm having is I'm trying to not have different functions for post/get/push etc. how can I pass this through as a variable?
The other issue I am having is some APIs need the data passed as "data=data" others only work when I specify "JSON=data". Others need headers while some don't, but if I pass headers = None as a variable i get 405 Errors. The only other way round it that I can think of is long nested if statements which is nearly as bad as the repeating code.
Am I trying to over simplify this? Is there a better way?
The scripts have a number of API calls (minimum of 5) to a number of different APIs (currently 3 but expecting this to grow) it will then combine all the received data, compare it to the database and the run any updates against the necessary APIs.
Imports:
from requests import Request, Session
Method:
def api_request(*args, **kwargs):
if "session" in kwargs and isinstance(kwargs["session"], Session):
local_session = kwargs["session"]
del kwargs["session"]
else:
local_session = Session()
req = Request(*args, **kwargs)
prepared_req = local_session.prepare_request(req)
try:
response = local_session.send(prepared_req)
except:
# error handling
pass
return response
Usage:
sess = Session()
headers = {
"Accept-Language": "en-US",
"User-Agent": "test-app"
}
result = api_request("GET", "http://www.google.com", session=sess, headers=headers)
print(result.text)

Python 2.2.3 HTTP Basic Authentication Implementation

I am trying to implement the HTTP Basic Authentication in Python 2.2.3. This is code:
import urllib2
proxyUserName1='<proxyusername>'
proxyPassword1='<proxypassword>'
realmName1='<realm>'
proxyUri1='<uri>'
passman=urllib2.HTTPPasswordMgr()
passman.add_password(realm=realmName1, uri=proxyUri1, user=proxyUserName1, passwd=proxyPassword1)
auth_handler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
# Setting up the request & request parameters
login_url_request = urllib2.Request('<URL To be Accessed>')
# Getting the Response & reading it.
try:
url_socket_connection = urllib2.urlopen(login_url_request)
except urllib2.URLError, urlerror:
print ("URL Error Occured:")
print (urlerror.code)
print (urlerror.headers)
except urllib2.HTTPError, httperror:
print ("HTTP Error Occured:")
print (httperror.code)
print (httperror.headers)
else:
login_api_response = str(url_socket_connection.read())
print (login_api_response)
I always get the URL Error 401. This code works perfectly in Python 3.4. Unfortunately I need to get this running in Python 2.2.3. Can someone please tell where am I going wrong ?
It worked after changing the code:
import urllib2
import base64
proxyUserName1='<proxyusername>'
proxyPassword1='<proxypassword>'
realmName1='<realm>'
proxyUri1='<uri>'
base64encodedstring = base64.encodestring('%s:%s' % (proxyUserName1, proxyPassword1)).replace('\n', '')
passman=urllib2.HTTPPasswordMgr()
passman.add_password(realm=realmName1, uri=proxyUri1, user=proxyUserName1, passwd=proxyPassword1)
auth_handler = urllib2.HTTPBasicAuthHandler(passman)
opener = urllib2.build_opener(auth_handler)
urllib2.install_opener(opener)
# Setting up the request & request parameters
login_url_request = urllib2.Request('<URL To be Accessed>')
login_url_request.add_header('Authorization', 'Basic %s' % base64encodedstring)
# Getting the Response & reading it.
try:
url_socket_connection = urllib2.urlopen(login_url_request)
except urllib2.URLError, urlerror:
print ("URL Error Occured:")
print (urlerror.code)
print (urlerror.headers)
except urllib2.HTTPError, httperror:
print ("HTTP Error Occured:")
print (httperror.code)
print (httperror.headers)
else:
login_api_response = str(url_socket_connection.read())
print (login_api_response)

Multithreading under url open process

I finished editing a script that check the url is requiring a WWW web basic authentication or not and printing the result for the user as in this script :
#!/usr/bin/python
# Importing libraries
from urllib2 import urlopen, HTTPError
import socket
import urllib2
import threading
import time
# Setting up variables
url = open("oo.txt",'r')
response = None
start = time.time()
# Excuting Coommands
start = time.time()
for line in url:
try:
response = urlopen(line, timeout=1)
except HTTPError as exc:
# A 401 unauthorized will raise an exception
response = exc
except socket.timeout:
print ("{0} | Request timed out !!".format(line))
except urllib2.URLError:
print ("{0} | Access error !!".format(line))
auth = response and response.info().getheader('WWW-Authenticate')
if auth and auth.lower().startswith('basic'):
print "requires basic authentication"
elif socket.timeout or urllib2.URLError:
print "Yay"
else:
print "Not requires basic authentication"
print "Elapsed Time: %s" % (time.time() - start)
I have a little things i need your help with the script to edit it here ..
I want the script to check every 10 urls together and give the result for all the urls in one time inside a text file . I read about the multithreading and the processing but i didn't find a match form my case to simplify the code to me .
also i have a problem in the result when a timeout or a url error appears , the script give the result in 2 lines like that :
http://www.test.test
| Access error !!
I want it in one line , why it shows in tow ??
Any help in this issues ?
Thanks in advance
The package concurrent.futures provides functionality, that makes it very easy to use concurrency in Python. You define a function check_url that should be called for each URL. Then you can use the map function the apply the function to each URL in parallel and iterate over the return values.
#! /usr/bin/env python3
import concurrent.futures
import urllib.error
import urllib.request
import socket
def load_urls(pathname):
with open(pathname, 'r') as f:
return [ line.rstrip('\n') for line in f ]
class BasicAuth(Exception): pass
class CheckBasicAuthHandler(urllib.request.BaseHandler):
def http_error_401(self, req, fp, code, msg, hdrs):
if hdrs.get('WWW-Authenticate', '').lower().startswith('basic'):
raise BasicAuth()
return None
def check_url(url):
try:
opener = urllib.request.build_opener(CheckBasicAuthHandler())
with opener.open(url, timeout=1) as u:
return 'requires no authentication'
except BasicAuth:
return 'requires basic authentication'
except socket.timeout:
return 'request timed out'
except urllib.error.URLError as e:
return 'access error ({!r})'.format(e.reason)
if __name__ == '__main__':
urls = load_urls('/tmp/urls.txt')
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
for url, result in zip(urls, executor.map(check_url, urls)):
print('{}: {}'.format(url, result))

Get URL when handling urllib2.URLError

This pertains to urllib2 specifically, but custom exception handling more generally. How do I pass additional information to a calling function in another module via a raised exception? I'm assuming I would re-raise using a custom exception class, but I'm not sure of the technical details.
Rather than pollute the sample code with what I've tried and failed, I'll simply present it as a mostly blank slate. My end goal is for the last line in the sample to work.
#mymod.py
import urllib2
def openurl():
req = urllib2.Request("http://duznotexist.com/")
response = urllib2.urlopen(req)
#main.py
import urllib2
import mymod
try:
mymod.openurl()
except urllib2.URLError as e:
#how do I do this?
print "Website (%s) could not be reached due to %s" % (e.url, e.reason)
You can add information to and then re-raise the exception.
#mymod.py
import urllib2
def openurl():
req = urllib2.Request("http://duznotexist.com/")
try:
response = urllib2.urlopen(req)
except urllib2.URLError as e:
# add URL and reason to the exception object
e.url = "http://duznotexist.com/"
e.reason = "URL does not exist"
raise e # re-raise the exception, so the calling function can catch it
#main.py
import urllib2
import mymod
try:
mymod.openurl()
except urllib2.URLError as e:
print "Website (%s) could not be reached due to %s" % (e.url, e.reason)
I don't think re-raising the exception is an appropriate way to solve this problem.
As #Jonathan Vanasco said,
if you're opening a.com , and it 301 redirects to b.com , urlopen will automatically follow that because an HTTPError with a redirect was raised. if b.com causes the URLError , the code above marks a.com as not existing
My solution is to overwrite redirect_request of urllib2.HTTPRedirectHandler
import urllib2
class NewHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
def redirect_request(self, req, fp, code, msg, headers, newurl):
m = req.get_method()
if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
or code in (301, 302, 303) and m == "POST"):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type")
)
# reuse the req object
# mind that req will be changed if redirection happends
req.__init__(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
return req
else:
raise HTTPError(req.get_full_url(), code, msg, headers, fp)
opener = urllib2.build_opener(NewHTTPRedirectHandler)
urllib2.install_opener(opener)
# mind that req will be changed if redirection happends
#req = urllib2.Request('http://127.0.0.1:5000')
req = urllib2.Request('http://www.google.com/')
try:
response = urllib2.urlopen(req)
except urllib2.URLError as e:
print 'error'
print req.get_full_url()
else:
print 'normal'
print response.geturl()
let's try to redirect the url to an unknown url:
import os
from flask import Flask,redirect
app = Flask(__name__)
#app.route('/')
def hello():
# return 'hello world'
return redirect("http://a.com", code=302)
if __name__ == '__main__':
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port)
And the result is:
error
http://a.com/
normal
http://www.google.com/

Checking if a website is up via Python

By using python, how can I check if a website is up? From what I read, I need to check the "HTTP HEAD" and see status code "200 OK", but how to do so ?
Cheers
Related
How do you send a HEAD HTTP request in Python?
You could try to do this with getcode() from urllib
import urllib.request
print(urllib.request.urlopen("https://www.stackoverflow.com").getcode())
200
For Python 2, use
print urllib.urlopen("http://www.stackoverflow.com").getcode()
200
I think the easiest way to do it is by using Requests module.
import requests
def url_ok(url):
r = requests.head(url)
return r.status_code == 200
You can use httplib
import httplib
conn = httplib.HTTPConnection("www.python.org")
conn.request("HEAD", "/")
r1 = conn.getresponse()
print r1.status, r1.reason
prints
200 OK
Of course, only if www.python.org is up.
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
req = Request("http://stackoverflow.com")
try:
response = urlopen(req)
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
else:
print ('Website is working fine')
Works on Python 3
import httplib
import socket
import re
def is_website_online(host):
""" This function checks to see if a host name has a DNS entry by checking
for socket info. If the website gets something in return,
we know it's available to DNS.
"""
try:
socket.gethostbyname(host)
except socket.gaierror:
return False
else:
return True
def is_page_available(host, path="/"):
""" This function retreives the status code of a website by requesting
HEAD data from the host. This means that it only requests the headers.
If the host cannot be reached or something else goes wrong, it returns
False.
"""
try:
conn = httplib.HTTPConnection(host)
conn.request("HEAD", path)
if re.match("^[23]\d\d$", str(conn.getresponse().status)):
return True
except StandardError:
return None
The HTTPConnection object from the httplib module in the standard library will probably do the trick for you. BTW, if you start doing anything advanced with HTTP in Python, be sure to check out httplib2; it's a great library.
If server if down, on python 2.7 x86 windows urllib have no timeout and program go to dead lock. So use urllib2
import urllib2
import socket
def check_url( url, timeout=5 ):
try:
return urllib2.urlopen(url,timeout=timeout).getcode() == 200
except urllib2.URLError as e:
return False
except socket.timeout as e:
print False
print check_url("http://google.fr") #True
print check_url("http://notexist.kc") #False
I use requests for this, then it is easy and clean.
Instead of print function you can define and call new function (notify via email etc.). Try-except block is essential, because if host is unreachable then it will rise a lot of exceptions so you need to catch them all.
import requests
URL = "https://api.github.com"
try:
response = requests.head(URL)
except Exception as e:
print(f"NOT OK: {str(e)}")
else:
if response.status_code == 200:
print("OK")
else:
print(f"NOT OK: HTTP response code {response.status_code}")
You may use requests library to find if website is up i.e. status code as 200
import requests
url = "https://www.google.com"
page = requests.get(url)
print (page.status_code)
>> 200
In my opinion, caisah's answer misses an important part of your question, namely dealing with the server being offline.
Still, using requests is my favorite option, albeit as such:
import requests
try:
requests.get(url)
except requests.exceptions.ConnectionError:
print(f"URL {url} not reachable")
If by up, you simply mean "the server is serving", then you could use cURL, and if you get a response than it's up.
I can't give you specific advice because I'm not a python programmer, however here is a link to pycurl http://pycurl.sourceforge.net/.
Hi this class can do speed and up test for your web page with this class:
from urllib.request import urlopen
from socket import socket
import time
def tcp_test(server_info):
cpos = server_info.find(':')
try:
sock = socket()
sock.connect((server_info[:cpos], int(server_info[cpos+1:])))
sock.close
return True
except Exception as e:
return False
def http_test(server_info):
try:
# TODO : we can use this data after to find sub urls up or down results
startTime = time.time()
data = urlopen(server_info).read()
endTime = time.time()
speed = endTime - startTime
return {'status' : 'up', 'speed' : str(speed)}
except Exception as e:
return {'status' : 'down', 'speed' : str(-1)}
def server_test(test_type, server_info):
if test_type.lower() == 'tcp':
return tcp_test(server_info)
elif test_type.lower() == 'http':
return http_test(server_info)
Requests and httplib2 are great options:
# Using requests.
import requests
request = requests.get(value)
if request.status_code == 200:
return True
return False
# Using httplib2.
import httplib2
try:
http = httplib2.Http()
response = http.request(value, 'HEAD')
if int(response[0]['status']) == 200:
return True
except:
pass
return False
If using Ansible, you can use the fetch_url function:
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.urls import fetch_url
module = AnsibleModule(
dict(),
supports_check_mode=True)
try:
response, info = fetch_url(module, url)
if info['status'] == 200:
return True
except Exception:
pass
return False
my 2 cents
def getResponseCode(url):
conn = urllib.request.urlopen(url)
return conn.getcode()
if getResponseCode(url) != 200:
print('Wrong URL')
else:
print('Good URL')
Here's my solution using PycURL and validators
import pycurl, validators
def url_exists(url):
"""
Check if the given URL really exists
:param url: str
:return: bool
"""
if validators.url(url):
c = pycurl.Curl()
c.setopt(pycurl.NOBODY, True)
c.setopt(pycurl.FOLLOWLOCATION, False)
c.setopt(pycurl.CONNECTTIMEOUT, 10)
c.setopt(pycurl.TIMEOUT, 10)
c.setopt(pycurl.COOKIEFILE, '')
c.setopt(pycurl.URL, url)
try:
c.perform()
response_code = c.getinfo(pycurl.RESPONSE_CODE)
c.close()
return True if response_code < 400 else False
except pycurl.error as err:
errno, errstr = err
raise OSError('An error occurred: {}'.format(errstr))
else:
raise ValueError('"{}" is not a valid url'.format(url))

Categories