Python socket multiprocessing pool of workers - python

I need to receive connections by sockets, read input data, do hard and long calculations and then send an answer. Queries at the same time may be a lot (i.e. 100)
I understood, that because of GIL I can't use normal threads, and tried to use C++ with boost:threads and boost:python, and running subinterpreter of python in each thread. But anyway it's not utilised all cores 100% at the same time.
So I decided to use multiprocessing, but create a static count pool of workers to serve these requests with a queue. This way, we don't waste time to fork a process, and we will not have 100 or more processess at the same time, only static count.
I am new to Python, mostly I utilised C++
So now I have this code, but it is not working. The connection opens and immediately closes, I don't know why:
#!/usr/bin/env python
import os
import sys
import SocketServer
import Queue
import time
import socket
import multiprocessing
from multiprocessing.reduction import reduce_handle
from multiprocessing.reduction import rebuild_handle
class MultiprocessWorker(multiprocessing.Process):
def __init__(self, sq):
self.SLEEP_INTERVAL = 1
# base class initialization
multiprocessing.Process.__init__(self)
# job management stuff
self.socket_queue = sq
self.kill_received = False
def run(self):
while not self.kill_received:
try:
h = self.socket_queue.get_nowait()
fd=rebuild_handle(h)
client_socket=socket.fromfd(fd,socket.AF_INET,socket.SOCK_STREAM)
#client_socket.send("hellofromtheworkerprocess\r\n")
received = client_socket.recv(1024)
print "Recieved on client: ",received
client_socket.close()
except Queue.Empty:
pass
#Dummy timer
time.sleep(self.SLEEP_INTERVAL)
class MyTCPHandler(SocketServer.BaseRequestHandler):
"""
The RequestHandler class for our server.
It is instantiated once per connection to the server, and must
override the handle() method to implement communication to the
client.
"""
def handle(self):
# self.request is the TCP socket connected to the client
#self.data = self.request.recv(1024).strip()
#print "{} wrote:".format(self.client_address[0])
#print self.data
# just send back the same data, but upper-cased
#self.request.sendall(self.data.upper())
#Either pipe it to worker directly like this
#pipe_to_worker.send(h) #instanceofmultiprocessing.Pipe
#or use a Queue :)
h = reduce_handle(self.request.fileno())
socket_queue.put(h)
if __name__ == "__main__":
#Mainprocess
address = ('localhost', 8082)
server = SocketServer.TCPServer(address, MyTCPHandler)
socket_queue = multiprocessing.Queue()
for i in range(5):
worker = MultiprocessWorker(socket_queue)
worker.start()
try:
server.serve_forever()
except KeyboardInterrupt:
sys.exit(0)

Is there a reason why you do not use
def reduce_socket(s):
...
def rebuild_socket(ds):
...
?
It seems like you could do this:
import copyreg
copyreg.pickle(type(socket.socket), reduce_socket, rebuild_socket)
and then pass the socket to the queue.
These are suggestions. Do they help?

try this:
def handle(self):
h = reduce_handle(self.request.fileno())
socket_queue.put(h)
self.request.close()
note the self.request.close() addition.

Related

Is it possible for multiple python files to use one queue?

one script(datamanger.py)
from multiprocessing import Manager
q = Manager().Queue()
The other two scripts are like this
from datamanager import q
import time
while True:
time.sleep(1)
q.put(1)
from datamanager import q
while True:
if not q.empty():
data = q.get()
print(data)
Is it possible to realize the function only use queue instead of message queue such as kafka?
An alternative to Ahmed's answer, which uses a simpler singleton, is below.
server.py:
from multiprocessing.managers import BaseManager
from multiprocessing import Queue
address = ('127.0.0.1', 50000) # you can change this
authkey = b"abc" # you should change this
class SharedQueue:
def __init__(self):
self._queue = Queue()
self._queue.put("Something really important!")
def __call__(self):
return self._queue
if __name__ == "__main__":
# Register our queue
shared_queue = SharedQueue()
BaseManager.register("get_queue", shared_queue)
# Start server
manager = BaseManager(address=address, authkey=authkey)
srv = manager.get_server()
srv.serve_forever()
client.py
from server import address, authkey
from multiprocessing.managers import BaseManager
if __name__ == "__main__":
BaseManager.register("get_queue")
manager = BaseManager(authkey=authkey, address=address)
manager.connect()
queue = manager.get_queue()
print(queue.get())
in order to have the queue alive and not tied to either process, you need to spawn a server that manages it, this server should have a singleton queue, and everyone that contacts it will get a proxy to this queue, the server code looks as follows:
# queue_server.py
from multiprocessing.managers import SyncManager
from multiprocessing.managers import BaseProxy
import multiprocessing
address = ('127.0.0.1', 50000) # you can change this
authkey = b"abc" # you should change this
class SingletonQueue:
instance = None
def __new__(cls, *args, **kwargs):
if SingletonQueue.instance is None:
SingletonQueue.instance = object.__new__(SingletonQueue)
return SingletonQueue.instance
else:
return SingletonQueue.instance
def get_queue(self):
if not hasattr(self, "queue"):
manager = SyncManager(address=address, authkey=authkey)
manager.connect()
self.queue = manager.Queue()
return self.queue
class CustomQueueProxy(BaseProxy):
_exposed_ = ['get_queue']
def get_queue(self):
queue = self._callmethod('get_queue')
return queue
def connect_manager():
multiprocessing.current_process().authkey = authkey
manager = SyncManager(address=address, authkey=authkey)
manager.register("SingletonQueue", SingletonQueue, CustomQueueProxy)
manager.connect()
return manager
def start_server():
manager = SyncManager(address=address, authkey=authkey)
manager.register("SingletonQueue", SingletonQueue, CustomQueueProxy)
server = manager.get_server()
print(f"running on ip = {server.address[0]}, and port {server.address[1]}")
multiprocessing.current_process().authkey = authkey
server.serve_forever()
if __name__ == "__main__":
start_server()
you need to run the server, after running the server you can connect to it with a client, the client code will look like this:
import multiprocessing
import queue_server # the server python file
manager = queue_server.connect_manager()
queue: multiprocessing.Queue = manager.SingletonQueue().get_queue()
queue.put(1)
print(queue.get())
note that this sets the authentication key of your python process to a certain value, so you cannot use it for doing multiple connections with different authentication keys, you have to have a fixed authentication key.
Edit: i'd probably go with Charchit Agarwal answer if anyone is reading this in the future, or a mix of both answers. depending on whether you want to allow connection over network/docker boundaries, which my answer allows.

Python: Twisted server and values

I'd like access to the same value in my protocol and in my factory so I made a test with this code:
import time
from multiprocessing import Process
from twisted.internet import reactor, protocol
class MyServer(protocol.Protocol):
def connectionMade(self):
self.factory.clients.append("client")
print self.factory.clients
class MyServerFactory(protocol.Factory):
def __init__(self):
self.protocol = MyServer
self.clients = []
def printClient(self):
print self.clients
if __name__ == '__main__':
factory = MyServerFactory()
reactor.listenTCP(4433, factory)
processTwisted = Process(target=reactor.run)
processTwisted.start()
time.sleep(10)
factory.printClient()
During the sleep I connect client to the server.
This is the console's log :
['client']
[]
And I expected:
['client']
['client']
How can do it ?
Twisted and multiprocessing don't work together like this. Also, lists and multiprocessing don't work together like this.
If you remove the use of multiprocessing you'll get the behavior you want.

Gevent.monkey.patch_all breaks code that relies on socket.shutdown()

I'm currently working to add support for gevent-socketio to an existing django project. I'm finding that gevent.monkey.patch_all() call is breaking the cancellation mechanism of a thread which is responsible for receiving data from a socket, we'll call the class SocketReadThread for now.
SocketReadThread is pretty simple, it calls recv() on a blocking socket. When it receives data is processes it and calls recv() again. The thread stops when an exception occurs or when recv() returns 0 bytes as occurs when socket.shutdown(SHUT_RDWR) is called in SocketReadThread.stop_reading()
The problem occurs when the gevent.monkey.patch_all() replaces the default socket implementation. Instead of shutting down nicely I get the following exception:
error: [Errno 9] File descriptor was closed in another greenlet
I'm assuming this is occurring because gevent makes my socket non-blocking in order to work its magic. This means that when I call socket.shutdown(socket.SHUT_RDWR) the greenlet that was doing the work for the monkey patched socket.recv call tried to read from the closed file descriptor.
I coded an example to isolate this issue:
from gevent import monkey
monkey.patch_all()
import socket
import sys
import threading
import time
class SocketReadThread(threading.Thread):
def __init__(self, socket):
super(SocketReadThread, self).__init__()
self._socket = socket
def run(self):
connected = True
while connected:
try:
print "calling socket.recv"
data = self._socket.recv(1024)
if (len(data) < 1):
print "received nothing, assuming socket shutdown"
connected = False
else :
print "Recieved something: {}".format(data)
except socket.timeout as e:
print "Socket timeout: {}".format(e)
connected = false
except :
ex = sys.exc_info()[1]
print "Unexpected exception occurrred: {}".format(str(ex))
raise ex
def stop_reading(self):
self._socket.shutdown(socket.SHUT_RDWR)
self._socket.close()
if __name__ == '__main__':
sock = socket.socket()
sock.connect(('127.0.0.1', 4242))
st = SocketReadThread(sock)
st.start()
time.sleep(3)
st.stop_reading()
st.join()
If you open a terminal an run nc -lp 4242 & (to give this program something to connect to) and then run this program you will see the exception mentioned above. If you remove the call to monkey.patch_all() you will see that it works just fine.
My question is: How can support cancellation of the SocketReadThread in a way that works with or without gevent monkey patching and doesn't require the use of an arbitrary timeout that would make cancellation slow (i.e. calling recv() with a timeout and checking a conditional)?
I found that there were two different workarounds for this. The first was to simply catch and suppress the exception. This appears to work fine since it is common practice for one thread to close a socket in order to cause another thread to exit from a blocking read. I don't know or understand why greenlets would complain about this other than a debugging aid. It is really just an annoyance.
The second option was to use the self-pipe trick (a quick search yields many explanations) as a mechanism to wake up a blocked thread. Essentially we create a second file descriptor (a socket is like a type of file descriptor to the OS) for signaling cancellation. We then use select as our blocking to wait for either incoming data on the socket or a cancellation request to come in on the cancellation file descriptor. See the example code below.
from gevent import monkey
monkey.patch_all()
import os
import select
import socket
import sys
import threading
import time
class SocketReadThread(threading.Thread):
def __init__(self, socket):
super(SocketReadThread, self).__init__()
self._socket = socket
self._socket.setblocking(0)
r, w = os.pipe()
self._cancelpipe_r = os.fdopen(r, 'r')
self._cancelpipe_w = os.fdopen(w, 'w')
def run(self):
connected = True
read_fds = [self._socket, self._cancelpipe_r]
while connected:
print "Calling select"
read_list, write_list, x_list = select.select(read_fds, [], [])
print "Select returned"
if self._cancelpipe_r in read_list :
print "exiting"
self._cleanup()
connected = False
elif self._socket in read_list:
print "calling socket.recv"
data = self._socket.recv(1024)
if (len(data) < 1):
print "received nothing, assuming socket shutdown"
connected = False
self._cleanup()
else :
print "Recieved something: {}".format(data)
def stop_reading(self):
print "writing to pipe"
self._cancelpipe_w.write("\n")
self._cancelpipe_w.flush()
print "joining"
self.join()
print "joined"
def _cleanup(self):
self._cancelpipe_r.close()
self._cancelpipe_w.close()
self._socket.shutdown(socket.SHUT_RDWR)
self._socket.close()
if __name__ == '__main__':
sock = socket.socket()
sock.connect(('127.0.0.1', 4242))
st = SocketReadThread(sock)
st.start()
time.sleep(3)
st.stop_reading()
Again, before running the above program run netcat -lp 4242 & to give it a listening socket to connect to.

Python socket accept blocks - prevents app from quitting

I've written a very simple python class which waits for connections on a socket. The intention is to stick this class into an existing app and asyncronously send data to connecting clients.
The problem is that when waiting on an socket.accept(), I cannot end my application by pressing ctrl-c. Neither can I detect when my class goes out of scope and notify it to end.
Ideally the application below should quit after the time.sleep(4) expires. As you can see below, I tried using select, but this also prevents the app from responding to ctrl-c. If I could detect that the variable 'a' has gone out of scope in the main method, I could set the quitting flag (and reduce the timeout on select to make it responsive).
Any ideas?
thanks
import sys
import socket
import threading
import time
import select
class Server( threading.Thread ):
def __init__( self, i_port ):
threading.Thread.__init__( self )
self.quitting = False
self.serversocket = socket.socket( socket.AF_INET, socket.SOCK_STREAM )
self.serversocket.bind( (socket.gethostname(), i_port ) )
self.serversocket.listen(5)
self.start()
def run( self ):
# Wait for connection
while not self.quitting:
rr,rw,err = select.select( [self.serversocket],[],[], 20 )
if rr:
(clientsocket, address) = self.serversocket.accept()
clientsocket.close()
def main():
a = Server( 6543 )
time.sleep(4)
if __name__=='__main__':
main()
Add self.setDaemon(True) to the __init__ before self.start().
(In Python 2.6 and later, self.daemon = True is preferred).
The key idea is explained here:
The entire Python program exits when
no alive non-daemon threads are left.
So, you need to make "daemons" of those threads who should not keep the whole process alive just by being alive themselves. The main thread is always non-daemon, by the way.
I don't recommend the setDaemon feature for normal shutdown. It's sloppy; instead of having a clean shutdown path for threads, it simply kills the thread with no chance for cleanup. It's good to set it, so your program doesn't get stuck if the main thread exits unexpectedly, but it's not a good normal shutdown path except for quick hacks.
import sys, os, socket, threading, time, select
class Server(threading.Thread):
def __init__(self, i_port):
threading.Thread.__init__(self)
self.setDaemon(True)
self.quitting = False
self.serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.serversocket.bind((socket.gethostname(), i_port))
self.serversocket.listen(5)
self.start()
def shutdown(self):
if self.quitting:
return
self.quitting = True
self.join()
def run(self):
# Wait for connection
while not self.quitting:
rr,rw,err = select.select([self.serversocket],[],[], 1)
print rr
if rr:
(clientsocket, address) = self.serversocket.accept()
clientsocket.close()
print "shutting down"
self.serversocket.close()
def main():
a = Server(6543)
try:
time.sleep(4)
finally:
a.shutdown()
if __name__=='__main__':
main()
Note that this will delay for up to a second after calling shutdown(), which is poor behavior. This is normally easy to fix: create a wakeup pipe() that you can write to, and include it in the select; but although this is very basic, I couldn't find any way to do this in Python. (os.pipe() returns file descriptors, not file objects that we can write to.) I havn't dig deeper, since it's tangental to the question.

How to stop BaseHTTPServer.serve_forever() in a BaseHTTPRequestHandler subclass?

I am running my HTTPServer in a separate thread (using the threading module which has no way to stop threads...) and want to stop serving requests when the main thread also shuts down.
The Python documentation states that BaseHTTPServer.HTTPServer is a subclass of SocketServer.TCPServer, which supports a shutdown method, but it is missing in HTTPServer.
The whole BaseHTTPServer module has very little documentation :(
Another way to do it, based on http://docs.python.org/2/library/basehttpserver.html#more-examples, is: instead of serve_forever(), keep serving as long as a condition is met, with the server checking the condition before and after each request. For example:
import CGIHTTPServer
import BaseHTTPServer
KEEP_RUNNING = True
def keep_running():
return KEEP_RUNNING
class Handler(CGIHTTPServer.CGIHTTPRequestHandler):
cgi_directories = ["/cgi-bin"]
httpd = BaseHTTPServer.HTTPServer(("", 8000), Handler)
while keep_running():
httpd.handle_request()
I should start by saying that "I probably wouldn't do this myself, but I have in the past". The serve_forever (from SocketServer.py) method looks like this:
def serve_forever(self):
"""Handle one request at a time until doomsday."""
while 1:
self.handle_request()
You could replace (in subclass) while 1 with while self.should_be_running, and modify that value from a different thread. Something like:
def stop_serving_forever(self):
"""Stop handling requests"""
self.should_be_running = 0
# Make a fake request to the server, to really force it to stop.
# Otherwise it will just stop on the next request.
# (Exercise for the reader.)
self.make_a_fake_request_to_myself()
Edit: I dug up the actual code I used at the time:
class StoppableRPCServer(SimpleXMLRPCServer.SimpleXMLRPCServer):
stopped = False
allow_reuse_address = True
def __init__(self, *args, **kw):
SimpleXMLRPCServer.SimpleXMLRPCServer.__init__(self, *args, **kw)
self.register_function(lambda: 'OK', 'ping')
def serve_forever(self):
while not self.stopped:
self.handle_request()
def force_stop(self):
self.server_close()
self.stopped = True
self.create_dummy_request()
def create_dummy_request(self):
server = xmlrpclib.Server('http://%s:%s' % self.server_address)
server.ping()
The event-loops ends on SIGTERM, Ctrl+C or when shutdown() is called.
server_close() must be called after server_forever() to close the listening socket.
import http.server
class StoppableHTTPServer(http.server.HTTPServer):
def run(self):
try:
self.serve_forever()
except KeyboardInterrupt:
pass
finally:
# Clean-up server (close socket, etc.)
self.server_close()
Simple server stoppable with user action (SIGTERM, Ctrl+C, ...):
server = StoppableHTTPServer(("127.0.0.1", 8080),
http.server.BaseHTTPRequestHandler)
server.run()
Server running in a thread:
import threading
server = StoppableHTTPServer(("127.0.0.1", 8080),
http.server.BaseHTTPRequestHandler)
# Start processing requests
thread = threading.Thread(None, server.run)
thread.start()
# ... do things ...
# Shutdown server
server.shutdown()
thread.join()
In my python 2.6 installation, I can call it on the underlying TCPServer - it still there inside your HTTPServer:
TCPServer.shutdown
>>> import BaseHTTPServer
>>> h=BaseHTTPServer.HTTPServer(('',5555), BaseHTTPServer.BaseHTTPRequestHandler)
>>> h.shutdown
<bound method HTTPServer.shutdown of <BaseHTTPServer.HTTPServer instance at 0x0100D800>>
>>>
I think you can use [serverName].socket.close()
In python 2.7, calling shutdown() works but only if you are serving via serve_forever, because it uses async select and a polling loop. Running your own loop with handle_request() ironically excludes this functionality because it implies a dumb blocking call.
From SocketServer.py's BaseServer:
def serve_forever(self, poll_interval=0.5):
"""Handle one request at a time until shutdown.
Polls for shutdown every poll_interval seconds. Ignores
self.timeout. If you need to do periodic tasks, do them in
another thread.
"""
self.__is_shut_down.clear()
try:
while not self.__shutdown_request:
# XXX: Consider using another file descriptor or
# connecting to the socket to wake this up instead of
# polling. Polling reduces our responsiveness to a
# shutdown request and wastes cpu at all other times.
r, w, e = select.select([self], [], [], poll_interval)
if self in r:
self._handle_request_noblock()
finally:
self.__shutdown_request = False
self.__is_shut_down.set()
Heres part of my code for doing a blocking shutdown from another thread, using an event to wait for completion:
class MockWebServerFixture(object):
def start_webserver(self):
"""
start the web server on a new thread
"""
self._webserver_died = threading.Event()
self._webserver_thread = threading.Thread(
target=self._run_webserver_thread)
self._webserver_thread.start()
def _run_webserver_thread(self):
self.webserver.serve_forever()
self._webserver_died.set()
def _kill_webserver(self):
if not self._webserver_thread:
return
self.webserver.shutdown()
# wait for thread to die for a bit, then give up raising an exception.
if not self._webserver_died.wait(5):
raise ValueError("couldn't kill webserver")
This is a simplified version of Helgi's answer for python 3.7:
import threading
import time
from http.server import ThreadingHTTPServer, SimpleHTTPRequestHandler
class MyServer(threading.Thread):
def run(self):
self.server = ThreadingHTTPServer(('localhost', 8000), SimpleHTTPRequestHandler)
self.server.serve_forever()
def stop(self):
self.server.shutdown()
if __name__ == '__main__':
s = MyServer()
s.start()
print('thread alive:', s.is_alive()) # True
time.sleep(2)
s.stop()
print('thread alive:', s.is_alive()) # False
This method I use successfully (Python 3) to stop the server from the web application itself (a web page):
import http.server
import os
import re
class PatientHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
stop_server = False
base_directory = "/static/"
# A file to use as an "server stopped user information" page.
stop_command = "/control/stop.html"
def send_head(self):
self.path = os.path.normpath(self.path)
if self.path == PatientHTTPRequestHandler.stop_command and self.address_string() == "127.0.0.1":
# I wanted that only the local machine could stop the server.
PatientHTTPRequestHandler.stop_server = True
# Allow the stop page to be displayed.
return http.server.SimpleHTTPRequestHandler.send_head(self)
if self.path.startswith(PatientHTTPRequestHandler.base_directory):
return http.server.SimpleHTTPRequestHandler.send_head(self)
else:
return self.send_error(404, "Not allowed", "The path you requested is forbidden.")
if __name__ == "__main__":
httpd = http.server.HTTPServer(("127.0.0.1", 8080), PatientHTTPRequestHandler)
# A timeout is needed for server to check periodically for KeyboardInterrupt
httpd.timeout = 1
while not PatientHTTPRequestHandler.stop_server:
httpd.handle_request()
This way, pages served via base address http://localhost:8080/static/ (example http://localhost:8080/static/styles/common.css) will be served by the default handler, an access to http://localhost:8080/control/stop.html from the server's computer will display stop.html then stop the server, any other option will be forbidden.
I tried all above possible solution and ended up with having a "sometime" issue - somehow it did not really do it - so I ended up making a dirty solution that worked all the time for me:
If all above fails, then brute force kill your thread using something like this:
import subprocess
cmdkill = "kill $(ps aux|grep '<name of your thread> true'|grep -v 'grep'|awk '{print $2}') 2> /dev/null"
subprocess.Popen(cmdkill, stdout=subprocess.PIPE, shell=True)
import http.server
import socketserver
import socket as sck
import os
import threading
class myserver:
def __init__(self, PORT, LOCATION):
self.thrd = threading.Thread(None, self.run)
self.Directory = LOCATION
self.Port = PORT
hostname = sck.gethostname()
ip_address = sck.gethostbyname(hostname)
self.url = 'http://' + ip_address + ':' + str(self.Port)
Handler = http.server.SimpleHTTPRequestHandler
self.httpd = socketserver.TCPServer(("", PORT), Handler)
print('Object created, use the start() method to launch the server')
def run(self):
print('listening on: ' + self.url )
os.chdir(self.Directory)
print('myserver object started')
print('Use the objects stop() method to stop the server')
self.httpd.serve_forever()
print('Quit handling')
print('Sever stopped')
print('Port ' + str(self.Port) + ' should be available again.')
def stop(self):
print('Stopping server')
self.httpd.shutdown()
self.httpd.server_close()
print('Need just one more request before shutting down'
def start(self):
self.thrd.start()
def help():
helpmsg = '''Create a new server-object by initialising
NewServer = webserver3.myserver(Port_number, Directory_String)
Then start it using NewServer.start() function
Stop it using NewServer.stop()'''
print(helpmsg)
Not a experience python programmer, just wanting to share my comprehensive solution. Mostly based on snippets here and there. I usually import this script in my console and it allows me to set up multiple servers for different locations using their specific ports, sharing my content with other devices on the network.
Here's a context-flavored version for Python 3.7+ which I prefer because it cleans up automatically and you can specify the directory to serve:
from contextlib import contextmanager
from functools import partial
from http.server import SimpleHTTPRequestHandler, ThreadingHTTPServer
from threading import Thread
#contextmanager
def http_server(host: str, port: int, directory: str):
server = ThreadingHTTPServer(
(host, port), partial(SimpleHTTPRequestHandler, directory=directory)
)
server_thread = Thread(target=server.serve_forever, name="http_server")
server_thread.start()
try:
yield
finally:
server.shutdown()
server_thread.join()
def usage_example():
import time
with http_server("127.0.0.1", 8087, "."):
# now you can use the web server
time.sleep(100)

Categories