I have a piece of code which was written in Python 3.5 and uses urllib module. Now, I tried to convert this so that it will work with Python 2.7, but I get some errors from the urllib() module.
E.g:
Traceback (most recent call last):
File "alert.py", line 13, in <module>
import urllib.request as urllib
ImportError: No module named request
Now, I know that urllib is deprecated in Python 2.7 so I'm coming here to ask for some help with the lines that use urllib.
import urllib.request as urllib
from http.cookiejar import CookieJar
from os.path import isfile
from os.path import join as joinPath
from sys import exc_info
from traceback import print_tb
from urllib.parse import urlencode
# constant
APPLICATION_PATH = '/srv/path/'
ALERT_POINT_PATH = joinPath(APPLICATION_PATH, 'alert_contact')
URL_REQUEST_TIMEOUT = 42
SMS_BOX_URL = 'xx.xxxx.xxx.xxx'
def initWebConnection(): # init web connection
response = 0
initUrlLibResponse = initUrlLib() # init urllib
if initUrlLibResponse:
response = 1
return response
def initUrlLib(): # init urllib
response = 0
try:
cookieJar = CookieJar() # cookies
opener = urllib.build_opener(urllib.HTTPCookieProcessor(cookieJar))
urllib.install_opener(opener)
except Exception as e:
response = 1
# ex_type, ex, tb = exc_info()
return response
def urlRequest(url, data=None): # make url request
contentResponse = None
try:
request = None
if data:
dataRequest = urlencode(data)
dataRequest = dataRequest.encode('UTF-8')
request = urllib.Request(url, dataRequest)
else:
request = urllib.Request(url)
response = urllib.urlopen(url=request, timeout=URL_REQUEST_TIMEOUT) # make request
# get response
contentResponse = response.read()
except Exception as e:
contentResponse = None
# ex_type, ex, tb = exc_info()
return contentResponse
try:
evt.data = 'Some name'
# check production state
isInProduction = False
if evt.prodState == 1000:
isInProduction = True
if isInProduction:
initWebConnection()
# check alert point'
if isfile(ALERT_POINT_PATH):
alertContactContent = None
with open(ALERT_POINT_PATH, 'r') as alertContactFile:
alertContactContent = alertContactFile.read()
alertContactContent = alertContactContent.splitlines()
if alertContactContent:
evt.summary = '#[ DNS: ALERT ]# {}'.format(evt.summary)
for alertContactContentLine in alertContactContent:
webRequestData = dict(
## TO DO: set the url parameters appropriately
phone=alertContactContentLine,
message='NEW ALERT: {}'.format(evt.ipAddress),
)
webRequestResponse = urlRequest(SMS_BOX_URL, webRequestData)
else:
evt.summary = '#[ ERROR: SMS ALERT NO CONTACT ]# {}'.format(evt.summary)
except Exception as e:
ex_type, ex, tb = exc_info()
print('\n #[ERROR]#exception: {ex}\n'.format(ex=e))
print('\n #[ERROR]#exception traceback: {trace}\n'.format(trace=print_tb(tb)))
evt.summary = '#[ DNS:ERROR traceback in event message ]# {}'.format(evt.summary)
evt.message = '#[ DNS:ERROR ex_type:\n {} \nex: {} \n traceback:\n {} \n]# {}'.format(ex_type, ex,
print_tb(tb),
evt.message)
You can change the import lines from
import urllib.request as urllib
from http.cookiejar import CookieJar
from urllib.parse import urlencode
to
import urllib2 as urllib
from cookielib import CookieJar
from urllib import urlencode
for Python 2.7
Related
I have written below custom collector which pulls the data from a rest api and adds page view metrics. Each payload has 5 metrics so I am adding timestamp to it. It successfully publishes it to the http listener but the metrics never expires. How can I add the expiry to these metrics?
#!/usr/bin/env python3
import argparse
import re
import sys
import time
import datetime
import urllib3
import requests
import aniso8601
import pytz
import json
from prometheus_client import start_http_server
from prometheus_client.core import GaugeMetricFamily, REGISTRY
class HttpCollector(object):
def __init__(self):
self.url = "my_endpoint"
self.username = 'my_userid'
self.password = 'my_pass'
self.labels = ['app_name', 'url_host' ]
self.page_views_metrics = GaugeMetricFamily('pageviews', 'Page Views', labels=self.labels)
def collect(self):
headers = {'Accept': '*/*', }
auth = (self.username, self.password)
urllib3.disable_warnings()
result = requests.get(self.url, headers=headers, auth=auth, timeout=60, verify=False)
if not result.ok:
# Log error
print("No results")
return
json_result = result.json()
for record in json_result['records']:
timestamp_epoch = covert_date_to_epoch(record["timestamp'])
label_values = ["testapp", "testhost"]
self.page_views_metrics.add_metric(label_values, record["page_views"], timestamp=timestamp_epoch)
yield self.page_views_metrics
Making the self.page_views_metrics as local variable to collect method solved the problem.
import re
import sys
import time
import datetime
import urllib3
import requests
import aniso8601
import pytz
import json
from prometheus_client import start_http_server
from prometheus_client.core import GaugeMetricFamily, REGISTRY
class HttpCollector(object):
def __init__(self):
self.url = "my_endpoint"
self.username = 'my_userid'
self.password = 'my_pass'
self.labels = ['app_name', 'url_host' ]
def collect(self):
headers = {'Accept': '*/*', }
auth = (self.username, self.password)
urllib3.disable_warnings()
result = requests.get(self.url, headers=headers, auth=auth, timeout=60, verify=False)
if not result.ok:
# Log error
print("No results")
return
json_result = result.json()
page_views_metrics = GaugeMetricFamily('pageviews', 'Page Views', labels=self.labels)
for record in json_result['records']:
timestamp_epoch = covert_date_to_epoch(record["timestamp'])
label_values = ["testapp", "testhost"]
page_views_metrics.add_metric(label_values, record["page_views"], timestamp=timestamp_epoch)
yield page_views_metrics
I've been using a function that I took from the book Web Scraping with Python from O'Really by Ryan Mitchell:
import sys
import os.path
import socket
import random
import urllib2
import contextlib
import diskCache
import logging as logger
from bs4 import BeautifulSoup
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout (DEFAULT_TIMEOUT)
def download (url, delay=DEFAULT_DELAY, user_agent=DEFAULT_AGENT, proxies=None, \
cache=None, num_retries=DEFAULT_RETRIES, timeout=DEFAULT_TIMEOUT, data=None):
result = None
if cache:
try:
result = cache[url]
except KeyError:
# url is not available in cache
pass
if result is not None and result['code'] is not None \
and num_retries > 0 and 500 <= result['code'] < 600:
# server error so ignore result from cache and re-download
result = None
if result is None:
proxy = random.choice(proxies) if proxies else None
headers = {'User-agent': user_agent}
result = call (url, headers, proxy=proxy, num_retries=num_retries, cache=cache)
if cache:
# save result to cache
cache[url] = result
return result['html']
def call (url, headers, proxy, num_retries, cache=None, data=None):
request = urllib2.Request(url, data, headers or {})
with contextlib.closing (urllib2.urlopen(request)) as connection:
try:
logger.info ('Downloading: %s', url)
html = connection.read ()
code = connection.getcode ()
except Exception as e:
logger.exception ('Download error:', str(e))
if cache:
del cache['url']
html = None
if hasattr (e, 'code'):
code = e.code
if num_retries > 0 and 500 <= code < 600:
return download (url, headers, num_retries-1, data) # retry server errors
else:
code = None
return {'html': html, 'code':code}
I wanted to know if there is a simpler way of handling the errors when downloading urls. I've seen that the requests library is a higher level and easier library and maybe it could simplify this. At the very least how would this code be for python3?
It would be something like
"""Functions used by the fetch module"""
# Standard library imports
import time
import socket
import logging as logger
from typing import Dict, Optional
# Third party imports
import requests
from requests.exceptions import HTTPError, Timeout
from bs4 import BeautifulSoup
# Constants
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout(DEFAULT_TIMEOUT)
def fetch(url: str, retries: Optional[int] = DEFAULT_RETRIES) -> Dict:
"""Download an url"""
code = None
try:
logger.info('Downloading: %s', url)
resp = requests.get(url)
resp.raise_for_status()
code = resp.status_code
except (HTTPError, Timeout) as ex:
logger.exception("Couldn't download %s", ex)
return None
if code is not None and retries > 0 and \
500 <= code < 600: # Server error
logger.info('Retrying download')
time.sleep(DEFAULT_DELAY)
return fetch(url, retries-1)
return {'html': resp, 'code': code}
As you said this is a lot easier with requests
resp = requests.get(url, headers=headers, timeout=timeout)
print(resp.status_code)
print(resp.text)
# for an API use resp.json()
There is no exception raised by default. You can call resp.raise_for_status() if you do want to raise an exception.
See http://docs.python-requests.org/en/master/user/quickstart/ for details
I got error 'No module named httplib'. Then I replaced httplib to http.client. I used 2to3 and added b before secret_key.
import http.client
import urllib.request, urllib.parse, urllib.error
import json
import hashlib
import hmac
from collections import OrderedDict
import time
server = "api.---.net"
api_key = "---"
secret_key = b"---"
def get(url):
conn = http.client.HTTPSConnection(server)
conn.request("GET", url)
response = conn.getresponse()
data = json.load(response)
return data
def post(url, params):
conn = http.client.HTTPSConnection(server)
data = OrderedDict(sorted(params))
encoded_data = urllib.parse.urlencode(data)
sign = hmac.new(secret_key, msg=encoded_data, digestmod=hashlib.sha256).hexdigest().upper()
headers = {"Api-key": api_key, "Sign": sign, "Content-type": "application/x-www-form-urlencoded"}
conn.request("POST", url, encoded_data, headers)
def com():
conn = http.client.HTTPSConnection(server)
sign = hmac.new(secret_key, b'', digestmod=hashlib.sha256).hexdigest().upper()
headers = {"Api-key": api_key, "Sign": sign, "Content-type": "application/x-www-form-urlencoded"}
conn.request("GET", "/ex/com", None, headers)
Now I get error
'NoneType' object is not subscriptable
Traceback (most recent call last):
File "lc.py", line , in <module>
COM = float(com()['fee'])
TypeError: 'NoneType' object is not subscriptable
The function com() returns nothing (that is, a None). On return, you attempt to apply the selection operator to None (['fee']), which would work only if com() returned a dictionary.
Just a little question : it's possible to force a build in Buildbot via a python script or command line (and not via the web interface) ?
Thank you!
If you have a PBSource configured in your master.cfg, you can send a change from the command line:
buildbot sendchange --master {MASTERHOST}:{PORT} --auth {USER}:{PASS}
--who {USER} {FILENAMES..}
You can make a python script using the urlib2 or requests library to simulate a POST to the web UI
import urllib2
import urllib
import cookielib
import uuid
import unittest
import sys
from StringIO import StringIO
class ForceBuildApi():
MAX_RETRY = 3
def __init__(self, server):
self.server = server
cookiejar = cookielib.CookieJar()
self.urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
def login(self, user, passwd):
data = urllib.urlencode(dict(username=user,
passwd=passwd))
url = self.server + "login"
request = urllib2.Request(url, data)
res = self.urlOpener.open(request).read()
if res.find("The username or password you entered were not correct") > 0:
raise Exception("invalid password")
def force_build(self, builder, reason, **kw):
"""Create a buildbot build request
several attempts are created in case of errors
"""
reason = reason + " ID="+str(uuid.uuid1())
kw['reason'] = reason
data_str = urllib.urlencode(kw)
url = "%s/builders/%s/force" % (self.server, builder)
print url
request = urllib2.Request(url, data_str)
file_desc = None
for i in xrange(self.MAX_RETRY):
try:
file_desc = self.urlOpener.open(request)
break
except Exception as e:
print >>sys.stderr, "error when doing force build", e
if file_desc is None:
print >>sys.stderr, "too many errors, giving up"
return None
for line in file_desc:
if 'alert' in line:
print >>sys.stderr, "invalid arguments", url, data_str
return None
if 'Authorization Failed' in line:
print >>sys.stderr, "Authorization Failed"
return
return reason
class ForceBuildApiTest(unittest.TestCase):
def setUp(self):
from mock import Mock # pip install mock for test
self.api = ForceBuildApi("server/")
self.api.urlOpener = Mock()
urllib2.Request = Mock()
uuid.uuid1 = Mock()
uuid.uuid1.return_value = "myuuid"
sys.stderr = StringIO()
def test_login(self):
from mock import call
self.api.login("log", "pass")
self.assertEquals(len(self.api.urlOpener.open.call_args_list), 1)
req = urllib2.Request.call_args_list
self.assertEquals([call('server/login', 'passwd=pass&username=log')], req)
def test_force(self):
from mock import call
self.api.urlOpener.open.return_value = ["blabla"]
r = self.api.force_build("builder1", reason="reason", param1="foo", param2="bar")
self.assertEquals(len(self.api.urlOpener.open.call_args_list), 1)
req = urllib2.Request.call_args_list
self.assertEquals([call('server//builders/builder1/force', 'reason=reason+ID%3Dmyuuid¶m2=bar¶m1=foo')], req)
self.assertEquals(r, "reason ID=myuuid")
def test_force_fail1(self):
from mock import call
self.api.urlOpener.open.return_value = ["alert bla"]
r = self.api.force_build("builder1", reason="reason", param1="foo", param2="bar")
self.assertEquals(len(self.api.urlOpener.open.call_args_list), 1)
req = urllib2.Request.call_args_list
self.assertEquals([call('server//builders/builder1/force', 'reason=reason+ID%3Dmyuuid¶m2=bar¶m1=foo')], req)
self.assertEquals(sys.stderr.getvalue(), "invalid arguments server//builders/builder1/force reason=reason+ID%3Dmyuuid¶m2=bar¶m1=foo\n")
self.assertEquals(r, None)
def test_force_fail2(self):
from mock import call
def raise_exception(*a, **kw):
raise Exception("oups")
self.api.urlOpener.open = raise_exception
r = self.api.force_build("builder1", reason="reason", param1="foo", param2="bar")
req = urllib2.Request.call_args_list
self.assertEquals([call('server//builders/builder1/force', 'reason=reason+ID%3Dmyuuid¶m2=bar¶m1=foo')], req)
self.assertEquals(sys.stderr.getvalue(), "error when doing force build oups\n"*3 + "too many errors, giving up\n")
self.assertEquals(r, None)
def test_force_fail3(self):
from mock import call
self.api.urlOpener.open.return_value = ["bla", "blu", "Authorization Failed"]
r = self.api.force_build("builder1", reason="reason", param1="foo", param2="bar")
req = urllib2.Request.call_args_list
self.assertEquals([call('server//builders/builder1/force', 'reason=reason+ID%3Dmyuuid¶m2=bar¶m1=foo')], req)
self.assertEquals(sys.stderr.getvalue(), "Authorization Failed\n")
self.assertEquals(r, None)
if __name__ == '__main__':
unittest.main()
I used this script
from twisted.internet import reactor, threads
from urlparse import urlparse
import httplib
import itertools
concurrent = 200
finished=itertools.count(1)
reactor.suggestThreadPoolSize(concurrent)
def getStatus(ourl):
url = urlparse(ourl)
conn = httplib.HTTPConnection(url.netloc)
conn.request("HEAD", url.path)
res = conn.getresponse()
return res.status
def processResponse(response,url):
print response, url
processedOne()
def processError(error,url):
print "error", url#, error
processedOne()
def processedOne():
if finished.next()==added:
reactor.stop()
def addTask(url):
req = threads.deferToThread(getStatus, url)
req.addCallback(processResponse, url)
req.addErrback(processError, url)
added=0
for url in open('urllist.txt'):
added+=1
addTask(url.strip())
try:
reactor.run()
except KeyboardInterrupt:
reactor.stop()
when i try to run the script $ python test.py
it just print the url not do cUrl or send HTTP request ..
how could I send the HTTP or cURL process for each one
Thanks
This should work if if the format of your urls does not contain 'http://' However,
If they do contain 'http://' there is a solution for that in the comments
import httplib
def requester(url):
host = url.split('/')[0]
#if urls do contain 'http://' --> host = url.split('/')[2].replace('http://','')
req = url[url.find(host)+len(host):]
conn = httplib.HTTPConnection(host)
conn.request("HEAD","/"+req)
response = conn.getresponse()
print response.status, response.reason
#if you want data...
#data = response.read()
#print data
for url in open(urls.txt):
try:
requester(url)
except Error,e:
print Error, e
Furthermore, I reccomend checking out the httplib
Tested code, using inlineCallbacks and deferToThread. Also using defer.gatherResults to know when all the deferreds have been processed (instead of the counter method in the OP):
from twisted.internet import reactor, defer, utils
from twisted.internet.threads import deferToThread
from urlparse import urlparse
import httplib
threadDeferred = deferToThread.__get__
#threadDeferred
def get_url_head(url_arg):
url = urlparse(url_arg)
conn = httplib.HTTPConnection(url.netloc)
conn.request("HEAD", url.path)
res = conn.getresponse()
conn.close()
return res.status
#defer.inlineCallbacks
def check_url(sem,url_arg):
yield sem.acquire()
try:
result = yield get_url_head(url_arg)
defer.returnValue(result)
finally:
sem.release()
#defer.inlineCallbacks
def run(reactor,SEMAPHORE_SIZE=10):
sem = defer.DeferredSemaphore(SEMAPHORE_SIZE)
deferreds = []
failed_urls = []
responded_urls = []
with open('urllist.txt','r') as f:
for line in f:
url_arg = line.strip()
d = check_url(sem,url_arg)
d.addCallback(processResult,url_arg,responded_urls).addErrback(processErr,url_arg,failed_urls)
deferreds.append(d)
res = yield defer.gatherResults(deferreds)
# Do something else with failed_urls and responded_urls
reactor.callLater(0,reactor.stop)
def main():
from twisted.internet import reactor
reactor.callWhenRunning(run,reactor)
reactor.run()
def processResult(result,url_arg,responded_urls):
print "Reponse %s from %s" % (result,url_arg)
responded_urls.append((url_arg,result))
def processErr(err,url_arg,failed_urls):
print "Error checking %s: %s" % (url_arg,repr(err.value))
failed_urls.append((url_arg,err.value))
if __name__ == '__main__':
main()