Python request behaviour - python

I want to check if a website exists. I use the request module to make a get request and check the status code after the request was made.
def check_website_exist(self, url):
result = True
request = requests.get("http://"+url)
print(request.status_code)
if request.status_code == 200:
output.info("website found!")
return result
else:
output.error("website not found!")
result = False
return result
When I make a request for the site 'www.isdfugpdohsiughsdopiughdsfoiguf.com' I get the status code 200, even though the site doesn't exists. Why do I get a 200 status code, but the website doesn't exist?

Here is an example of how to do it. you are not catching the possible errors.
import requests
from requests.exceptions import ConnectionError
try:
url = "http://www.isdfugpdohsiughsdopiughdsfoiguf.com"
request_url = requests.get(url)
print(request_url.status_code)
except ConnectionError:
print("No exist")

Related

How to get errors from python requests when host is unreachable?

im trying to make small watchdog script for pinging container with nginx.
this is my code:
import requests
import time
url = 'http://localhost:1234/'
response = ''
def checkloop (url, response):
try:
response = requests.get(url)
except requests.ConnectionError:
print("Can't connect to the site, sorry")
else:
response.status_code == 200
print("OK", response.status_code)
while response == '':
checkloop (url, response)
time.sleep(5)
But i cant get error when host is in down state. script breaking. i always getting this error:
"requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=1234): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x10332e4c0>: Failed to establish a new connection: [Errno 61] Connection refused'))"
how i can get in print() 403 or 404 or another error when host is different when httpget 200 code? is it possible?
or maybe you can advise to me already working watchdog for python or small manual how to do that?
A HTTP 2xx/3x/4xx/5xx answer from the server requires to be able to HTTP-connect to the server. If you can't connect at the network level at all (host unreachable), you can't connect at the HTTP level (see the OSI model).
Instead of making your checkloop function update the global variable response, you could use your function to tell you if the server is reachable. Example :
import requests
import time
url = 'http://localhost:1234/'
def can_connect_to(url) -> bool: # `bool` is a type hint for the return type of the function
try:
response = requests.get(url)
except requests.ConnectionError:
print("Can't connect to the site, try again later")
return False # can NOT connect
else:
# do whatever you want
if response.status_code == 200:
print("OK 200")
return True # can connect
else:
print("strange ...", response.status_code)
# return True or False ?
while not can_connect_to(url):
time.sleep(5)
If you need the response content, you can use Python's Thruthy/False value for that, which looks like what you posted :
import requests
import time
url = 'http://localhost:1234/'
def fetch(url) -> str: # we expect a string to be returned (any length)
try:
response = requests.get(url)
except requests.ConnectionError:
print("Can't connect to the site, try again later")
return "" # can NOT connect, we don't have any content for now
elif response.status_code == 200:
print("OK 200")
return response.text # can connect
else:
print("strange ...", response.status_code)
return response.text # may be truthy or falsy ...
while not response_text := fetch(url): # call fetch, put the result into `response_text`, and check if its truthy or falsy
time.sleep(5)
# at this point, `not response_text` is False, which means that `response_text` is truthy, which for a string means it is non-empty
print(response_text)

How to check if request post has been successfully posted?

I'm using the python requests module to handle requests on a particular website i'm crawling. I'm fairly new to HTTP requests, but I do understand the basics. Here's the situation. There's a form I want to submit and I do that by using the post method from the requests module:
# I create a session
Session = requests.Session()
# I get the page from witch I'm going to post the url
Session.get(url)
# I create a dictionary containing all the data to post
PostData = {
'param1': 'data1',
'param2': 'data2',
}
# I post
Session.post(SubmitURL, data=PostData)
Is there a any ways to check if the data has been successfully posted?
Is the .status_code method a good way to check that?
If you pick up the result from when you post you can then check the status code:
result = Session.post(SubmitURL, data=PostData)
if result.status_code == requests.codes.ok:
# All went well...
I am a Python newbie but I think the easiest way is:
if response.ok:
# whatever
because all 2XX codes are successful requests not just 200
I am using this code:
import json
def post():
return requests.post('http://httpbin.org/post', data={'x': 1, 'y': 2})
def test_post(self):
txt = post().text
txt = json.loads(txt)
return (txt.get("form") == {'y': '2', 'x': '1'})
For POST responses, the response status code is usually 201 (especially with RestAPIs) but can be 200 sometimes as well depending on how the API has been implemented.
There are a few different solutions:
Solution #1 (use the framework to raise an error) - Recommended:
from requests.exceptions import HTTPError
try:
response = Session.post(SubmitURL, data=PostData)
# No exception will raised if response is successful
response.raise_for_status()
except HTTPError:
# Handle error
else:
# Success
Solution #2 (explicitly check for 200 and 201):
from http import HttpStatus
response = Session.post(SubmitURL, data=PostData)
if response.status_code in [HttpStatus.OK, HttpStatus.CREATED]:
# Success
Solution #3 (all 200 codes are considered successful):
from http import HttpStatus
response = Session.post(SubmitURL, data=PostData)
if response.status_code // 100 == 2:
# Success

python request error handling

I am writing some small python app which uses requests to get and post data to an html page.
now the problem I am having is that if I can't reach the html page the code stops with a max retries exceeded. I want to be able to do some things if I can't reach the server.
is such a thing possible?
here is sample code:
import requests
url = "http://127.0.0.1/"
req = requests.get(url)
if req.status_code == 304:
#do something
elif req.status_code == 404:
#do something else
# etc etc
# code here if server can`t be reached for whatever reason
You want to handle the exception requests.exceptions.ConnectionError, like so:
try:
req = requests.get(url)
except requests.exceptions.ConnectionError as e:
# Do stuff here
You may want to set a suitable timeout when catching ConnectionError:
url = "http://www.stackoverflow.com"
try:
req = requests.get(url, timeout=2) #2 seconds timeout
except requests.exceptions.ConnectionError as e:
# Couldn't connect
See this answer if you want to change the number of retries.

Python script to see if a web page exists without downloading the whole page?

I'm trying to write a script to test for the existence of a web page, would be nice if it would check without downloading the whole page.
This is my jumping off point, I've seen multiple examples use httplib in the same way, however, every site I check simply returns false.
import httplib
from httplib import HTTP
from urlparse import urlparse
def checkUrl(url):
p = urlparse(url)
h = HTTP(p[1])
h.putrequest('HEAD', p[2])
h.endheaders()
return h.getreply()[0] == httplib.OK
if __name__=="__main__":
print checkUrl("http://www.stackoverflow.com") # True
print checkUrl("http://stackoverflow.com/notarealpage.html") # False
Any ideas?
Edit
Someone suggested this, but their post was deleted.. does urllib2 avoid downloading the whole page?
import urllib2
try:
urllib2.urlopen(some_url)
return True
except urllib2.URLError:
return False
how about this:
import httplib
from urlparse import urlparse
def checkUrl(url):
p = urlparse(url)
conn = httplib.HTTPConnection(p.netloc)
conn.request('HEAD', p.path)
resp = conn.getresponse()
return resp.status < 400
if __name__ == '__main__':
print checkUrl('http://www.stackoverflow.com') # True
print checkUrl('http://stackoverflow.com/notarealpage.html') # False
this will send an HTTP HEAD request and return True if the response status code is < 400.
notice that StackOverflow's root path returns a redirect (301), not a 200 OK.
Using requests, this is as simple as:
import requests
ret = requests.head('http://www.example.com')
print(ret.status_code)
This just loads the website's header. To test if this was successfull, you can check the results status_code. Or use the raise_for_status method which raises an Exception if the connection was not succesfull.
How about this.
import requests
def url_check(url):
#Description
"""Boolean return - check to see if the site exists.
This function takes a url as input and then it requests the site
head - not the full html and then it checks the response to see if
it's less than 400. If it is less than 400 it will return TRUE
else it will return False.
"""
try:
site_ping = requests.head(url)
if site_ping.status_code < 400:
# To view the return status code, type this : **print(site.ping.status_code)**
return True
else:
return False
except Exception:
return False
You can try
import urllib2
try:
urllib2.urlopen(url='https://someURL')
except:
print("page not found")

Checking if a website is up via Python

By using python, how can I check if a website is up? From what I read, I need to check the "HTTP HEAD" and see status code "200 OK", but how to do so ?
Cheers
Related
How do you send a HEAD HTTP request in Python?
You could try to do this with getcode() from urllib
import urllib.request
print(urllib.request.urlopen("https://www.stackoverflow.com").getcode())
200
For Python 2, use
print urllib.urlopen("http://www.stackoverflow.com").getcode()
200
I think the easiest way to do it is by using Requests module.
import requests
def url_ok(url):
r = requests.head(url)
return r.status_code == 200
You can use httplib
import httplib
conn = httplib.HTTPConnection("www.python.org")
conn.request("HEAD", "/")
r1 = conn.getresponse()
print r1.status, r1.reason
prints
200 OK
Of course, only if www.python.org is up.
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
req = Request("http://stackoverflow.com")
try:
response = urlopen(req)
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
else:
print ('Website is working fine')
Works on Python 3
import httplib
import socket
import re
def is_website_online(host):
""" This function checks to see if a host name has a DNS entry by checking
for socket info. If the website gets something in return,
we know it's available to DNS.
"""
try:
socket.gethostbyname(host)
except socket.gaierror:
return False
else:
return True
def is_page_available(host, path="/"):
""" This function retreives the status code of a website by requesting
HEAD data from the host. This means that it only requests the headers.
If the host cannot be reached or something else goes wrong, it returns
False.
"""
try:
conn = httplib.HTTPConnection(host)
conn.request("HEAD", path)
if re.match("^[23]\d\d$", str(conn.getresponse().status)):
return True
except StandardError:
return None
The HTTPConnection object from the httplib module in the standard library will probably do the trick for you. BTW, if you start doing anything advanced with HTTP in Python, be sure to check out httplib2; it's a great library.
If server if down, on python 2.7 x86 windows urllib have no timeout and program go to dead lock. So use urllib2
import urllib2
import socket
def check_url( url, timeout=5 ):
try:
return urllib2.urlopen(url,timeout=timeout).getcode() == 200
except urllib2.URLError as e:
return False
except socket.timeout as e:
print False
print check_url("http://google.fr") #True
print check_url("http://notexist.kc") #False
I use requests for this, then it is easy and clean.
Instead of print function you can define and call new function (notify via email etc.). Try-except block is essential, because if host is unreachable then it will rise a lot of exceptions so you need to catch them all.
import requests
URL = "https://api.github.com"
try:
response = requests.head(URL)
except Exception as e:
print(f"NOT OK: {str(e)}")
else:
if response.status_code == 200:
print("OK")
else:
print(f"NOT OK: HTTP response code {response.status_code}")
You may use requests library to find if website is up i.e. status code as 200
import requests
url = "https://www.google.com"
page = requests.get(url)
print (page.status_code)
>> 200
In my opinion, caisah's answer misses an important part of your question, namely dealing with the server being offline.
Still, using requests is my favorite option, albeit as such:
import requests
try:
requests.get(url)
except requests.exceptions.ConnectionError:
print(f"URL {url} not reachable")
If by up, you simply mean "the server is serving", then you could use cURL, and if you get a response than it's up.
I can't give you specific advice because I'm not a python programmer, however here is a link to pycurl http://pycurl.sourceforge.net/.
Hi this class can do speed and up test for your web page with this class:
from urllib.request import urlopen
from socket import socket
import time
def tcp_test(server_info):
cpos = server_info.find(':')
try:
sock = socket()
sock.connect((server_info[:cpos], int(server_info[cpos+1:])))
sock.close
return True
except Exception as e:
return False
def http_test(server_info):
try:
# TODO : we can use this data after to find sub urls up or down results
startTime = time.time()
data = urlopen(server_info).read()
endTime = time.time()
speed = endTime - startTime
return {'status' : 'up', 'speed' : str(speed)}
except Exception as e:
return {'status' : 'down', 'speed' : str(-1)}
def server_test(test_type, server_info):
if test_type.lower() == 'tcp':
return tcp_test(server_info)
elif test_type.lower() == 'http':
return http_test(server_info)
Requests and httplib2 are great options:
# Using requests.
import requests
request = requests.get(value)
if request.status_code == 200:
return True
return False
# Using httplib2.
import httplib2
try:
http = httplib2.Http()
response = http.request(value, 'HEAD')
if int(response[0]['status']) == 200:
return True
except:
pass
return False
If using Ansible, you can use the fetch_url function:
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.urls import fetch_url
module = AnsibleModule(
dict(),
supports_check_mode=True)
try:
response, info = fetch_url(module, url)
if info['status'] == 200:
return True
except Exception:
pass
return False
my 2 cents
def getResponseCode(url):
conn = urllib.request.urlopen(url)
return conn.getcode()
if getResponseCode(url) != 200:
print('Wrong URL')
else:
print('Good URL')
Here's my solution using PycURL and validators
import pycurl, validators
def url_exists(url):
"""
Check if the given URL really exists
:param url: str
:return: bool
"""
if validators.url(url):
c = pycurl.Curl()
c.setopt(pycurl.NOBODY, True)
c.setopt(pycurl.FOLLOWLOCATION, False)
c.setopt(pycurl.CONNECTTIMEOUT, 10)
c.setopt(pycurl.TIMEOUT, 10)
c.setopt(pycurl.COOKIEFILE, '')
c.setopt(pycurl.URL, url)
try:
c.perform()
response_code = c.getinfo(pycurl.RESPONSE_CODE)
c.close()
return True if response_code < 400 else False
except pycurl.error as err:
errno, errstr = err
raise OSError('An error occurred: {}'.format(errstr))
else:
raise ValueError('"{}" is not a valid url'.format(url))

Categories