http webserver simply stops responding - python

Earlier I wrote multi threaded web server, which at times would simply stop processing requests and also getting terminated at peak times.
I've implemented same opencv based processing in Python Websocket based server too which is working fine.
For very old browsers, I also need POST based processing using web server. I converted from multithreading to single but that also is stopping different times and not printing any log etc.
I checked syslog but not clue. More than a week has gone by without finding a solution. I suspect something related to Digital Ocean VPS or network.
I've this code and can't figure why it should stop responding:
from http.server import HTTPServer, BaseHTTPRequestHandler
import threading
import cgi
import tempfile
import resource
import base64
from common import *
from datetime import datetime
print( datetime.now());
gg_hashmap = getHash()
USE_HTTPS = True
def dump(obj):
for attr in dir(obj):
print("obj.%s = %r" % (attr, getattr(obj, attr)))
class PostHandler(BaseHTTPRequestHandler):
def handle(self):
try:
BaseHTTPRequestHandler.handle(self)
except :
pass
def do_POST(self):
try:
print("new req="+str( datetime.now()),flush=True);
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': self.headers['Content-Type'],
})
self.send_response(200)
self.send_header("Content-type", "text/html")
self.send_header("Access-Control-Allow-Origin", "*")
self.end_headers()
for field in form.keys():
field_item = form[field]
if field_item.filename:
file_data = field_item.file.read()
file_len = len(file_data)
del file_data
self.wfile.write('\tUploaded %s as "%s" (%d bytes)\n' % \
(field, field_item.filename, file_len))
else:
pass
if ('base64' in form and 'license' in form):
print("license=",form['license'].value);
global gg_hashmap
file_content = form['base64'].value
try:
f, temp_file_path = tempfile.mkstemp(prefix='sand', suffix='jpg')
os.close(f)
with open(temp_file_path, 'wb') as w:
w.write(base64.b64decode (file_content))
input_hashes = get_input_img(temp_file_path)
all_letters = ""
if input_hashes != None:
for inp_hash in input_hashes:
lowest = 1000
lowest_letter = ''
for letter, arr in gg_hashmap.items():
for hashval in arr:
if int(inp_hash - hashval) < lowest:
lowest = int(inp_hash - hashval)
lowest_letter = letter
all_letters += lowest_letter
self.wfile.write(bytes(all_letters, "utf8"))
except Exception as e:
print("exception3 caught")
print(e)
print(str(e))
return
except Exception as e:
print("Caught unknown exception",e)
def do_GET(self):
self.send_response(200)
self.end_headers()
message = threading.currentThread().getName()
self.wfile.write(bytes(message,'utf-8'))
self.wfile.write('\n')
return
form = cgi.FieldStorage(
fp=self.rfile,
headers=self.headers,
environ={'REQUEST_METHOD': 'POST',
'CONTENT_TYPE': self.headers['Content-Type'],
})
self.send_response(200)
self.end_headers()
for field in form.keys():
field_item = form[field]
if field_item.filename:
file_data = field_item.file.read()
file_len = len(file_data)
del file_data
self.wfile.write('\tUploaded %s as "%s" (%d bytes)\n' % \
(field, field_item.filename, file_len))
else:
pass
return
def run():
# resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
# threading.stack_size(24*1048576)
server = HTTPServer(('0.0.0.0', 443), PostHandler)
if USE_HTTPS:
import ssl
server.socket = ssl.wrap_socket(server.socket, keyfile='./ssl/key.pem', certfile='./ssl/public.pem'
, ca_certs="./ssl/cap1_transactionfailed_com.ca-bundle" , server_side=True)
server.serve_forever()
if __name__ == '__main__':
run()

I don't think much anyone will want to read through all 157 lines of convoluted HTTP request handling code (of which some isn't even posted, from common import *) to try and decipher why it might stop at some given time.
It's likely not the answer you want to hear, but HTTPServer really isn't what anyone uses in production for Python.
You should look into rewriting your code with either (my recommendations at the time of writing)
FastAPI (or its underlying Starlette framework) on Uvicorn (Uvicorn will let you do the websocket stuff in the same process), or
Flask on Gunicorn or uWSGI
For instance, here's a rough estimation of what your code would look like with Starlette. (There may be bugs since it's dry-coded, and it's certainly not fully async, but that doesn't matter here.)
import tempfile
import base64
from starlette.applications import Starlette
from starlette.requests import Request
from starlette.responses import PlainTextResponse
app = Starlette()
def get_all_letters(input_hashes):
all_letters = ""
if input_hashes:
for inp_hash in input_hashes:
lowest = 1000
lowest_letter = ""
for letter, arr in gg_hashmap.items():
for hashval in arr:
if int(inp_hash - hashval) < lowest:
lowest = int(inp_hash - hashval)
lowest_letter = letter
all_letters += lowest_letter
return all_letters
#app.route("/", methods=["GET", "POST"])
async def handle(request: Request):
if request.method == "GET":
return PlainTextResponse("Hello!")
form = await request.form()
if not ("base64" in form and "license" in form):
return PlainTextResponse("Missing data!", status_code=400)
with tempfile.NamedTemporaryFile(prefix="sand", suffix="jpg") as f:
content = await form["base64"].read()
f.write(base64.b64decode(content))
f.flush()
input_hashes = get_input_img(f)
if not input_hashes:
return PlainTextResponse("No input hashes!", status_code=400)
all_letters = get_all_letters(input_hashes)
return PlainTextResponse(all_letters)
You could then run this using Uvicorn (which will also handle all of that HTTPS stuff for you).

With mkstemp you must delete the tempfile. You probably run out of disk space or max out files in temp directory. As AKX mentioned though you should look into using a more robust http server. if the file thing isn't your problem there are numerous other issues that could come up when using a non production HTTP server.

Related

Redirect instead of HTTP403

I'm trying to make a proxy with a Blocklist. I initially used 403 when a user goes to a blocked page. However, it doesn't work with HTTPS and returns ERR_TUNNEL_CONNECTION_FAILED as explained in Respond with 403 in an HTTPS proxy
Thus, I want to redirect the user to a html page like this
This is my code:
import socket
import threading
import signal
import sys
import fnmatch
import errno
import time
import pdb
import re
from time import gmtime, strftime, localtime
import logging
import config
import rule
import tag as tag_store
from Ignite import flame
core = flame()
p=re.compile('(http:\/\/)?([\w\.-]*)(\:(\d*))?(\/.*)?')
thread_logger = logging.getLogger('thread')
access_logger = logging.getLogger('access')
csv_logger = logging.getLogger('csv')
def proxy(browser_conn, client_addr):
print("hi")
def ishostAllowed(host):
print("\n\nHost:")
print(str(host))
access_logger.info(str(host))
if host.split('.')[-1].isdigit():
thread_logger.warn("Invalid host:".format(host),extra=req)
return core.check_allow(host)
#pdb.set_trace()
tags=tag_store.get(host)
if not tags:
thread_logger.warn("{0} isn't allowed: empty tags".format(host),extra=req)
return core.check_allow(host)
for tag in tag_store.get(host):
if not rule.isTagAllowed(tag):
thread_logger.warn("{0}:{1} isn't allowed".format(host,tag),extra=req)
return core.check_allow(host)
return core.check(host)
def proxy_http(request):
try:
# create a socket to connect to the web server
#pdb.set_trace()
server_conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_conn.settimeout(config.connection_timeout)
server_conn.connect((request['host'], request['port']))
server_conn.sendall(request['raw_data']) # send request to webserver
while 1:
data = server_conn.recv(config.max_request_len) # receive data from web server
if (len(data) > 0):
browser_conn.send(data) # send to browser
else:
break
except socket.error as error_msg:
thread_logger.error(str(error_msg)+":"+str(request),extra=req);
finally:
if server_conn:
server_conn.close()
if browser_conn:
browser_conn.close()
return
def response(status,message):
reply = "HTTP/1.1 {0} {1}\r\n"
reply += "Proxy-agent: Sinktrap\r\n"
reply += "\r\n"
reply = reply.format(status,message);
#pdb.set_trace()
browser_conn.sendall( reply.encode() )
def proxy_https(request):
#pdb.set_trace()
try:
server_conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# If successful, send 200 code response
server_conn.connect((req['host'], req['port']))
response(200,'Connection established')
except socket.error as err:
# If the connection could not be established, exit
# Should properly handle the exit with http error code here
thread_logger.error("Cannot establish https connection:"+err,extra=req);
if server_conn:
server_conn.close()
if browser_conn:
browser_conn.close()
return
# Indiscriminately forward bytes
browser_conn.setblocking(0)
server_conn.setblocking(0)
timeout=time.time()+60 # 1 minute
while timeout-time.time()>0:
request_done=False
replied_done=False
try:
request =browser_conn.recv(config.max_request_len) # receive data from browser
if (len(request) > 0):
server_conn.sendall(request) # send to web server
else:
request_done=True
#hread_logger.info("REQUEST len: " + str(len(request)),extra=req);
except socket.error as e:
if e.errno==errno.EWOULDBLOCK:
time.sleep(0.1)
pass
else:
thread_logger.error("pipe error:"+str(e),extra=req);
break
try:
reply = server_conn.recv(config.max_request_len) # receive data from web server
if (len(reply) > 0):
browser_conn.sendall(reply) # send to browser
else:
replied_done=True
#thread_logger.info("reply len: " + str(len(reply)),extra=req);
except socket.error as e:
if e.errno==errno.EWOULDBLOCK:
time.sleep(0.1)
pass
else:
thread_logger.error("pipe error:"+str(e),extra=req);
break
if request_done and replied_done:
break
server_conn.close()
browser_conn.close()
raw_data = browser_conn.recv(config.max_request_len) # get the request from browser
req={'raw_data':raw_data,
'tname' : threading.currentThread().getName(),
'client_ip' : client_addr[0],
'client_port' : client_addr[1]
}
thread_logger.info("REQUEST: {0}".format(raw_data),extra=req);
#pdb.set_trace()
try:
# request_line is the first one. https://www.w3.org/Protocols/rfc2616/rfc2616-sec5.html
msg_body_pos=len(raw_data)
for i in range(4,len(raw_data)):
if raw_data[i-4:i].decode()=='\r\n\r\n':
msg_body_pos=i
break
lines=raw_data[:msg_body_pos-4].decode('utf-8').split('\r\n')
if len(lines[0])<16:
thread_logger.warn("INVALU REQUEST:{0}".format(raw_data),extra=req);
return
headers = {k:v for k,v in (x.split(':',1) for x in lines[1:]) }
if 'Referer' in headers:
req['Referer']=headers['Referer']
else:
req['Referer']=''
if 'User-Agent' in headers:
req['User-Agent']=headers['User-Agent']
else:
req['User-Agent']=''
req['request_line'] =lines[0]
req['method'],req['request_uri'],req['http_version']=lines[0].split(' ')
#check if the first line is valid request. request_line might be empty
if not req['method'] or not req['request_uri'] or not req['http_version']:
thread_logger.warn("INVALU REQUEST:{0}".format(raw_data),extra=req);
return
except Exception as e:
thread_logger.error("INVALU REQUEST:{0} {1}".format(e, raw_data),extra=req);
logging.exception("INVALU REQUEST")
return
access_logger.info("",extra=req)
#pdb.set_trace()
m=p.match(req['request_uri'])
req['host']=m.group(2)
req['port']=int(m.group(4)) if m.group(4) else 80
# Check if request is allowed or not
if not ishostAllowed(req['host']):
csv_logger.info("blocked",extra=req);
thread_logger.warn("Block REQUEST:{0}".format(raw_data),extra=req);
response(403,"The website has been blocked by Ignite's proxy.")
#Breq = req
#Breq['host'] = "azlancoding.github.io/Blocked"
#proxy_https(Breq)
#response(307,"https://azlancoding.github.io/BLOCKED")
return
csv_logger.info("allowed",extra=req);
#pdb.set_trace()
if req['method']=='CONNECT':
proxy_https(req)
else:
proxy_http(req)
The original proxy is pcxy
See my github project here

Search haveibeenpwned for all emails on a domain

I am able to use haveibeenpwned to search for 1 account compromise. However, I could not find an option to use the API key to search for compromise of all the email accounts on a domain. (For example. if the domain is xyz.com, I want to search for the compromise of abc#xyz.com, peter.charlie#xyz.com and so on). I am aware of the notification email that I can sign up for. But, that is a lengthy process and I prefer using the API.
So, I wrote a script to search against haveibeenpwned for all the email address of my domain, but it takes very long. I searched through a couple of Github projects, but I did not find any such implementation. Has anyone tried this before?
I have added the code below. I am using Multi threading approach, but still it takes very long, is there any other Optimization strategy I can use? Please help. Thank you.
import requests, json
import threading
from time import sleep
import datetime
import splunklib.client as client
import splunklib.results as results
date = datetime.datetime.now()
from itertools import islice
import linecache
import sys
def PrintException():
exc_type, exc_obj, tb = sys.exc_info()
f = tb.tb_frame
lineno = tb.tb_lineno
filename = f.f_code.co_filename
linecache.checkcache(filename)
line = linecache.getline(filename, lineno, f.f_globals)
print 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj)
class myThread (threading.Thread):
def __init__(self, threadID, name, list_emails):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.list_emails = list_emails
def run(self):
i=0
print "Starting " + self.name
for email in self.list_emails:
print i
i=i+1
result = check_pasteaccount(email)
print email
print result
print result
print "Exiting " + self.name
def check_pasteaccount(account):
account = str(account)
result = ""
URL = "https://haveibeenpwned.com/api/v3/pasteaccount/%s?truncateResponse=false" % (account)
# print(URL)
headers= {'hibp-api-key':api_key}
result = ""
try:
r = requests.get(url=URL,headers=headers)
# sleep(2)
status_code = r.status_code
if status_code == 200:
data = r.text
result = []
for entry in json.loads(data.decode('utf8')):
if int((date - datetime.datetime.strptime(entry['Date'], '%Y-%m-%dT%H:%M:%SZ')).days) > 120:
pass
else:
result.append(['Title: {0}'.format(entry['Title']), \
'Source: {0}'.format(['Source']), \
'Paste ID: {0}'.format(entry['Id'])])
if len(result) == 0:
result = "No paste reported for given account and time frame."
else:
paste_result = ""
for entry in result:
for item in entry:
paste_result += str(item) + "\r\n"
paste_result += "\r\n"
result = paste_result
elif status_code == 404:
result = "No paste for the account"
else:
if status_code == 429:
sleep(5)
# print "Limit exceeded, sleeping"
result = check_pasteaccount(account)
else:
result = "Exception"
print status_code
except Exception as e:
result = "Exception"
PrintException()
pass
return result
def split_every(n, iterable):
iterable = iter(iterable)
for chunk in iter(lambda: list(islice(iterable, n)), []):
yield chunk
def main():
print datetime.datetime.now()
# Fetching the list of email addresses from Splunk
list_emails = connect_splunk()
print datetime.datetime.now()
i=0
list_split = split_every(1000,list_emails)
threads=[]
for list in list_split:
i=i+1
thread_name = "Thread" + str(i)
thread = myThread(1, thread_name, list)
thread.start()
threads.append(thread)
# Wait for all the threads to complete
for t in threads:
t.join()
print "Completed Search"
Here's a shorter and maybe more efficient version of your script using the standard multiprocessing library instead of a hand-rolled thread system.
You'll need Python 3.6+ since we're using f-strings.
You'll need to install the tqdm module for fancy progress bars.
You can adjust the number of concurrent requests with the pool size parameter.
Output is written in machine-readable JSON Lines format into a timestamped file.
A single requests session is shared (per-worker), which means less time spent connecting to HIBP.
import datetime
import json
import multiprocessing
import random
import time
import requests
import tqdm
HIBP_PARAMS = {
"truncateResponse": "false",
}
HIBP_HEADERS = {
"hibp-api-key": "xxx",
}
sess = requests.Session()
def check_pasteaccount(account):
while True:
resp = sess.get(
url=f"https://haveibeenpwned.com/api/v3/pasteaccount/{account}",
params=HIBP_PARAMS,
headers=HIBP_HEADERS,
)
if resp.status_code == 429:
print("Quota exceeded, waiting for a while")
time.sleep(random.uniform(3, 7))
continue
if resp.status_code >= 400:
return {
"account": account,
"status": resp.status_code,
"result": resp.text,
}
return {
"account": account,
"status": resp.status_code,
"result": resp.json(),
}
def connect_splunk():
# TODO: return emails
return []
def main():
list_emails = [str(account) for account in connect_splunk()]
datestamp = datetime.datetime.now().isoformat().replace(":", "-")
output_filename = f"accounts-log-{datestamp}.jsonl"
print(f"Accounts to look up: {len(list_emails)}")
print(f"Output filename: {output_filename}")
with multiprocessing.Pool(processes=16) as p:
with open(output_filename, "a") as f:
results_iterable = p.imap_unordered(
check_pasteaccount, list_emails, chunksize=20
)
for result in tqdm.tqdm(
results_iterable,
total=len(list_emails),
unit="acc",
unit_scale=True,
):
print(json.dumps(result, sort_keys=True), file=f)
if __name__ == "__main__":
main()

Why h.getresponse() is needed in python logging HTTPHandler?

I overrided the method emit of python logging httphandler to adapt it to my needs, and I noticed the line
h.getresponse() #can't do anything with the result
Why is this line necessary?
I noticed that removing this line has no effect when using unsecure logging, but makes the logs fail when using secure connection.
def emit(self, record):
"""
Emit a record.
Send the record to the Web server as a percent-encoded dictionary
"""
try:
import http.client, urllib.parse
host = self.host
if self.secure:
h = http.client.HTTPSConnection(host, context=self.context)
else:
h = http.client.HTTPConnection(host)
url = self.url
data = urllib.parse.urlencode(self.mapLogRecord(record))
if self.method == "GET":
if (url.find('?') >= 0):
sep = '&'
else:
sep = '?'
url = url + "%c%s" % (sep, data)
h.putrequest(self.method, url)
# support multiple hosts on one IP address...
# need to strip optional :port from host, if present
i = host.find(":")
if i >= 0:
host = host[:i]
# See issue #30904: putrequest call above already adds this header
# on Python 3.x.
# h.putheader("Host", host)
if self.method == "POST":
h.putheader("Content-type",
"application/x-www-form-urlencoded")
h.putheader("Content-length", str(len(data)))
if self.credentials:
import base64
s = ('%s:%s' % self.credentials).encode('utf-8')
s = 'Basic ' + base64.b64encode(s).strip().decode('ascii')
h.putheader('Authorization', s)
h.endheaders()
if self.method == "POST":
h.send(data.encode('utf-8'))
h.getresponse() #can't do anything with the result
except Exception:
self.handleError(record)
The getresponse() call guarantees that the request is actually sent to the server by getting the response to the request.

http.server if statement os.path.isfile() doesn't work

So this is my code:
from http.server import BaseHTTPRequestHandler, HTTPServer
import requests, json, os
PORT = 1337
class getHandler(BaseHTTPRequestHandler):
def handleJSON(self, provider, data):
if provider == "provider_1":
json_data = json.loads(data)
sl_token = json_data["access_token"]
return sl_token
elif provider == "provider_2":
json_data = json.loads(data)
pb_token = json_data["access_token"]
return pb_token
def do_GET(self):
data = self.requestline
self.send_response(200)
self.send_header('Content-type','text/html')
self.end_headers()
self.wfile.write(b'You may close the tab now')
print("Raw Data: " + data)
if not os.path.isfile("PbToken.txt") and os.path.isfile("SlToken.txt"):
if "GET /?code=" and "&state=" in data: # Provider_1
print("Provider_1 Data:", data)
pb_code = data[data.find("/?code=") + len("/?code="):data.find("&state=")]
with open("PbToken.txt", "w") as file:
file.write(pb_code)
file.close()
elif "GET /?code=" in data: # Provider_2
print("Provider_2 Data:", data)
sl_code = data.strip()
sl_code = sl_code[sl_code.rindex("/?code=") + len("/?code="):sl_code.rindex(" ")]
with open("SlToken.txt", "w") as file:
file.write(sl_code)
file.close()
else:
raise SystemExit
server = HTTPServer(('localhost', PORT), getHandler)
print("Started server on port", PORT)
server.serve_forever()
So from the class getHandler in the function do_GET(self) it never makes it past the if not os.path.isfile("PbToken.txt") and os.path.isfile("SlToken.txt"): statement (I've of course made sure the files aren't actually there). I want it to check if both of the files exist, if they don't do what's written below. If the files exist they should go straight to the else statement where it uses raise SystemExit. What am I doing wrong?
It should've been or not and...
So simply if not os.path.isfile("PbToken.txt") or not os.path.isfile("SlToken.txt"): to see if one of the files are missing.

connect requests and responses in twisted proxy

I am trying to log requests and responses and the headers of both using a Twisted Framework's Proxy.
Right now the requests are handled by process method in HTTPProxyRequest class and the responses are handled in HTTPProxyClient.
I want to know know which response corresponds to which request.
What is the least hackish way of doing this?
from twisted.python import log
from twisted.web import http
from twisted.web.proxy import Proxy, ProxyRequest, ProxyClientFactory, ProxyClient
class HTTPProxyClient(ProxyClient):
def __init__(self, command, rest, version, headers, data, father):
ProxyClient.__init__(self, command, rest, version, headers, data, father)
self.new_buffer = ""
self.new_headers = {}
def handleHeader(self, key, value):
self.new_headers[key] = value
ProxyClient.handleHeader(self, key, value)
def handleResponsePart(self, buffer):
# log.msg("RESPONSE_CONTENT: %s" % buffer)
ProxyClient.handleResponsePart(self, buffer)
self.new_buffer += buffer
def handleResponseEnd(self):
log.msg("RESPONSE_HEADERS:%s \n RESPONSE_CONTENT: %s" % (str(self.new_headers),self.new_buffer))
ProxyClient.handleResponseEnd(self)
class HTTPProxyFactory(ProxyClientFactory):
protocol = HTTPProxyClient
class HTTPProxyRequest(ProxyRequest):
protocols = {'http' : HTTPProxyFactory}
def process(self):
log.msg("REQUEST_METHOD:", self.method)
# log.msg(self.content.read())
for k,v in self.requestHeaders.getAllRawHeaders():
# log.msg("REQUEST_HEADER: %s : %s" % (k,v))
pass
# log.msg("\n \n")
if self.method == "POST":
log.msg("LOGGING POST CONTENT:", self.content.read())
ProxyRequest.process(self)
class HTTPProxy(Proxy):
requestFactory = HTTPProxyRequest
if __name__ == '__main__': # $ python proxy_modify_request.py
import sys
from twisted.internet import reactor
log.startLogging(sys.stdout)
factory = http.HTTPFactory()
factory.protocol = HTTPProxy
reactor.listenTCP(8070, factory)
reactor.run()

Categories