Http request connection timed out in google colab - python

We are writing code for yolo and upload integer to mcs lite on google colab. There is the code.
(Mcs lite github:https://github.com/MCS-Lite)
import http.client
import urllib
import json
import time
import sys
import http
import socket
import urllib
print(http)
import http.client
if hasattr(http.client, 'HTTPSConnection'):
print (type(http.client))
count_apple=2
sys.path.insert(0, '/usr/lib/python2.7/bridge/')
deviceId = ""
deviceKey = ""
def post_to_mcslite(payload):
headers = {"Content-type": "text/csv", "deviceKey": deviceKey}
not_connected = 1
while (not_connected):
try:
conn = http.client.HTTPConnection("#IP")
not_connected = 0
except (http.client.HTTPException, socket.error) as ex:
print("Error: %s" % ex)
time.sleep(10)
conn.request("POST", "/api/devices/" + deviceId + "/datapoints.csv", payload, headers)
response = conn.getresponse()
print(( response.status, response.reason, payload, time.strftime("%c")))
data = response.read()
conn.close()
while True:
apple = str(count_apple)
appleString = "apple,," + apple
payload = appleString + "\n"
post_to_mcslite(payload)
time.sleep(10)
This code are doing well and could run on visual code.
(Device Id ,Device key and IP had been filled.)
But it would wrong when it run on google colab, it will print out "conection timed out" and cannot closed. How could I do?

Related

How to create log file and save all the running logs from Python script

I have below part of Python code extraction.py where i have lot of print statements and there are other logs generated from REST API. I want to create logfile with date and time prefix attached with the log file for example log_yyyy-MM-dd-hhminss to track all the logs from my Python code. This will help to track daily logs from my Python code.
import requests
import json
import shutil
import time
import gzip
import os
extraction request:
if status_code == 202:
requestUrl = r2.headers["location"]
print('Extraction is not complete, we shall poll the location URL:')
print(str(requestUrl))
requestHeaders = {
"Prefer": "respond-async",
"Content-Type": "application/json",
"Authorization": "token " + token
}
while (status_code == 202):
print('As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)')
time.sleep(30)
r3 = requests.get(requestUrl, headers=requestHeaders)
status_code = r3.status_code
print('HTTP status of the response: ' + str(status_code))
You could create a function that does that for you and replace prints with that.
Something along the lines of:
def log(msg):
t = datetime.datetime.now()
time = t.strftime("[%d.%m.%y] Time - %H_%M_%S")
log_msg = str(msg)
# or any other format (could even use time.ctime() or similar)
print(log_msg)
with open("path/" + time + ".log",'a+') as file:
file.write(log_msg + "\n")
So your code would look like somthing like this:
import requests
import json
import shutil
import time
import gzip
import os
import datetime
def log(msg):
t = datetime.datetime.now()
time = t.strftime("[%d.%m.%y] Time - %H_%M_%S")
log_msg = str(msg)
print(log_msg)
with open("path/" + time + ".log",'a+') as file:
file.write(log_msg + "\n")
extraction request:
if status_code == 202:
requestUrl = r2.headers["location"]
log('Extraction is not complete, we shall poll the location URL:')
log(str(requestUrl))
requestHeaders = {
"Prefer": "respond-async",
"Content-Type": "application/json",
"Authorization": "token " + token
}
while (status_code == 202):
log('As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)')
time.sleep(30)
r3 = requests.get(requestUrl, headers=requestHeaders)
status_code = r3.status_code
log('HTTP status of the response: ' + str(status_code))
Also as buran pointed out, one should use some logging module that does the writing to file.

Download and handle errors

I've been using a function that I took from the book Web Scraping with Python from O'Really by Ryan Mitchell:
import sys
import os.path
import socket
import random
import urllib2
import contextlib
import diskCache
import logging as logger
from bs4 import BeautifulSoup
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout (DEFAULT_TIMEOUT)
def download (url, delay=DEFAULT_DELAY, user_agent=DEFAULT_AGENT, proxies=None, \
cache=None, num_retries=DEFAULT_RETRIES, timeout=DEFAULT_TIMEOUT, data=None):
result = None
if cache:
try:
result = cache[url]
except KeyError:
# url is not available in cache
pass
if result is not None and result['code'] is not None \
and num_retries > 0 and 500 <= result['code'] < 600:
# server error so ignore result from cache and re-download
result = None
if result is None:
proxy = random.choice(proxies) if proxies else None
headers = {'User-agent': user_agent}
result = call (url, headers, proxy=proxy, num_retries=num_retries, cache=cache)
if cache:
# save result to cache
cache[url] = result
return result['html']
def call (url, headers, proxy, num_retries, cache=None, data=None):
request = urllib2.Request(url, data, headers or {})
with contextlib.closing (urllib2.urlopen(request)) as connection:
try:
logger.info ('Downloading: %s', url)
html = connection.read ()
code = connection.getcode ()
except Exception as e:
logger.exception ('Download error:', str(e))
if cache:
del cache['url']
html = None
if hasattr (e, 'code'):
code = e.code
if num_retries > 0 and 500 <= code < 600:
return download (url, headers, num_retries-1, data) # retry server errors
else:
code = None
return {'html': html, 'code':code}
I wanted to know if there is a simpler way of handling the errors when downloading urls. I've seen that the requests library is a higher level and easier library and maybe it could simplify this. At the very least how would this code be for python3?
It would be something like
"""Functions used by the fetch module"""
# Standard library imports
import time
import socket
import logging as logger
from typing import Dict, Optional
# Third party imports
import requests
from requests.exceptions import HTTPError, Timeout
from bs4 import BeautifulSoup
# Constants
DEFAULT_AGENT = 'Mozilla/5.0 Firefox/56.0'
DEFAULT_DELAY = 3
DEFAULT_RETRIES = 10
DEFAULT_TIMEOUT = 60
socket.setdefaulttimeout(DEFAULT_TIMEOUT)
def fetch(url: str, retries: Optional[int] = DEFAULT_RETRIES) -> Dict:
"""Download an url"""
code = None
try:
logger.info('Downloading: %s', url)
resp = requests.get(url)
resp.raise_for_status()
code = resp.status_code
except (HTTPError, Timeout) as ex:
logger.exception("Couldn't download %s", ex)
return None
if code is not None and retries > 0 and \
500 <= code < 600: # Server error
logger.info('Retrying download')
time.sleep(DEFAULT_DELAY)
return fetch(url, retries-1)
return {'html': resp, 'code': code}
As you said this is a lot easier with requests
resp = requests.get(url, headers=headers, timeout=timeout)
print(resp.status_code)
print(resp.text)
# for an API use resp.json()
There is no exception raised by default. You can call resp.raise_for_status() if you do want to raise an exception.
See http://docs.python-requests.org/en/master/user/quickstart/ for details

Storing JSON data in Aerospike in Python

I'm trying to retrieve response from ip-api.com for most IP ranges. But I want to store that data in Aerospike but I'm having some errors.
Here is the Python script
# import the module
from __future__ import print_function
import aerospike
import urllib2
config = {
'hosts': [ ('127.0.0.1', 3000) ]
}
try:
client = aerospike.client(config).connect()
except:
import sys
print("failed to connect to the cluster with", config['hosts'])
sys.exit(1)
key = ('ip', 'hit', 'trial')
try:
for i in range(0,255):
for j in range(0,255):
for k in range(0,255):
for l in range(0,255):
if not((i == 198 and j == 168) or (i == 172 and j > 15 and j < 32) or (i == 10)):
response = urllib2.urlopen('http://ip-api.com/json/'+str(i)+'.'+str(j)+'.'+str(k)+'.'+str(l))
html = response.read()
client.put(key, html)
except Exception as e:
import sys
print("error: {0}".format(e), file=sys.stderr)
client.close()
I'm new to Python as well as Aerospike, infact any no-SQL databases. Any help would be appreciated.
All code from aerospike perspective it right, except you would want to change
html = response.read()
client.put(key, html)
to
import json
client.put(key, json.load(response))
The response is a json string which needs to be converted to json object

Loop through to change parameter in Python 2.7

So I have this code that is creating an output in Excel.
What I want to do now is get the parameters (lid) in payload to loop through a list of other ID's
This list is stored in a txt file.
can anyone modify my code to show me how to do that please?
The text file has values
1654,
3457,
4327,
1234
(can also hard code these somewhere in the script if it is easier)
from __future__ import print_function
import sys
import csv
import collections
import itertools
try:
import requests
from requests import exceptions
import base64
import json
except ImportError as e:
import requests
from requests import exceptions
import base64
import json
print ("Import Error: %s" % e)
API_TOKEN = u''
b64token = base64.b64encode(bytes(API_TOKEN))
REST_BASE_URL = u'https://visdasa.dsds.com/rest/'
# API URL request examples (choose one)
REST_URL = u'rawdata/'
FULL_URL = REST_BASE_URL + REST_URL
def retrieve_data(api_url):
try:
#connect to the API and retrieve data
bauth_header = {'Authorization': 'Basic '+b64token.decode('UTF-8')}
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots','lid': '9263'}
response = requests.get(api_url, headers=bauth_header, params=payload)
# check the api response
if response.status_code == requests.codes.ok:
# Convert from json data
json_data = json.loads(response.text)
Header_String = "ID", "Site Name", "Network ID", "Network Lablel", "Company Branch ID", "Comapany Label","Count", "timestamp", "ots_duration", "notsure1", "notsure2"
for location_row in json_data["data"]["locations"]:
Location_string = (location_row["id"], location_row["label"], location_row["site"]["network"]["id"],location_row["site"]["network"]["label"],
location_row["site"]["id"], location_row["site"]["label"])
try:
with open('C:\\Users\\teddy\\Desktop\\party\\test.csv', 'w') as wFile:
writer = csv.writer(wFile, delimiter=',')
writer.write(Header_string)
for row in json_data["data"]["raw_data"]:
writer.writerow(row)
except IOError as e:
logger.error("I/O error({0}): {1}".format(e.errno, e.strerror))
print( "I/O error({0}): {1}".format(e.errno, e.strerror))
else:
json_data = json.loads(response.text)
# If not successful api call the throw an error
raise requests.RequestException("Error with the api. Status code : %i \n Json response: %s"
% (response.status_code, json_data))
except (requests.exceptions.ProxyError, requests.RequestException) as e:
print (e)
def main():
#retrieve_data(FULL_URL, PROXY_SETTINGS)
retrieve_data(FULL_URL)
sys.exit()
if __name__ == '__main__':
main()
Why not just pass all the lid values as a parameter to your retrieve_data function.
def retrieve_data(api_url):
would become
def retrieve_data(api_url, lid_value):
You would remove the hardcoded lid section of your payload so the payload would look like this
payload = {'start': '2014-08-01T00:00:01', 'stop': '2014- 8-01T23:59:59','category': 'ots'}
Then on the next line you can add
payload['lid'] = lid_value
In your main function you could then loop through the values in the text file. Here is a simple loop with a list.
def main():
lid_values = ['1654', '3457', '4327', '1234']
for lid in lid_values:
retrieve_data(FULL_URL, lid)
sys.exit()

Python HTTP Request

I used this script
from twisted.internet import reactor, threads
from urlparse import urlparse
import httplib
import itertools
concurrent = 200
finished=itertools.count(1)
reactor.suggestThreadPoolSize(concurrent)
def getStatus(ourl):
url = urlparse(ourl)
conn = httplib.HTTPConnection(url.netloc)
conn.request("HEAD", url.path)
res = conn.getresponse()
return res.status
def processResponse(response,url):
print response, url
processedOne()
def processError(error,url):
print "error", url#, error
processedOne()
def processedOne():
if finished.next()==added:
reactor.stop()
def addTask(url):
req = threads.deferToThread(getStatus, url)
req.addCallback(processResponse, url)
req.addErrback(processError, url)
added=0
for url in open('urllist.txt'):
added+=1
addTask(url.strip())
try:
reactor.run()
except KeyboardInterrupt:
reactor.stop()
when i try to run the script $ python test.py
it just print the url not do cUrl or send HTTP request ..
how could I send the HTTP or cURL process for each one
Thanks
This should work if if the format of your urls does not contain 'http://' However,
If they do contain 'http://' there is a solution for that in the comments
import httplib
def requester(url):
host = url.split('/')[0]
#if urls do contain 'http://' --> host = url.split('/')[2].replace('http://','')
req = url[url.find(host)+len(host):]
conn = httplib.HTTPConnection(host)
conn.request("HEAD","/"+req)
response = conn.getresponse()
print response.status, response.reason
#if you want data...
#data = response.read()
#print data
for url in open(urls.txt):
try:
requester(url)
except Error,e:
print Error, e
Furthermore, I reccomend checking out the httplib
Tested code, using inlineCallbacks and deferToThread. Also using defer.gatherResults to know when all the deferreds have been processed (instead of the counter method in the OP):
from twisted.internet import reactor, defer, utils
from twisted.internet.threads import deferToThread
from urlparse import urlparse
import httplib
threadDeferred = deferToThread.__get__
#threadDeferred
def get_url_head(url_arg):
url = urlparse(url_arg)
conn = httplib.HTTPConnection(url.netloc)
conn.request("HEAD", url.path)
res = conn.getresponse()
conn.close()
return res.status
#defer.inlineCallbacks
def check_url(sem,url_arg):
yield sem.acquire()
try:
result = yield get_url_head(url_arg)
defer.returnValue(result)
finally:
sem.release()
#defer.inlineCallbacks
def run(reactor,SEMAPHORE_SIZE=10):
sem = defer.DeferredSemaphore(SEMAPHORE_SIZE)
deferreds = []
failed_urls = []
responded_urls = []
with open('urllist.txt','r') as f:
for line in f:
url_arg = line.strip()
d = check_url(sem,url_arg)
d.addCallback(processResult,url_arg,responded_urls).addErrback(processErr,url_arg,failed_urls)
deferreds.append(d)
res = yield defer.gatherResults(deferreds)
# Do something else with failed_urls and responded_urls
reactor.callLater(0,reactor.stop)
def main():
from twisted.internet import reactor
reactor.callWhenRunning(run,reactor)
reactor.run()
def processResult(result,url_arg,responded_urls):
print "Reponse %s from %s" % (result,url_arg)
responded_urls.append((url_arg,result))
def processErr(err,url_arg,failed_urls):
print "Error checking %s: %s" % (url_arg,repr(err.value))
failed_urls.append((url_arg,err.value))
if __name__ == '__main__':
main()

Categories