instance variable unchanged after updating my multiple processes - python

For the following code, I expect the output of dr.hello to be 10, since 10 spawned processes call updateHello once to increment dr.hello. But it is now 0, what is the reason and how do I change it?
from multiprocessing import Process, Lock
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver:
hello=0
def __init__(self):
self.lock=Lock()
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
l.acquire()
self.hello+=1
print "update from",i
l.release()
if __name__=='__main__':
dr=driver()
dr.run()
print dr.hello

I think you need a multiprocessing.Value for a shared variable
from multiprocessing import Process, Lock, Value
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver:
hello = Value("i",lock=True) # create shared variable of type int
hello.value = 0
def __init__(self):
self.lock=Lock()
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
with self.lock: # acquire lock
driver.hello.value += 1
print ("update from", i)
# release lock
if __name__=='__main__':
dr=driver()
dr.run()
print(driver.hello.value)
Ipython output:
if __name__=='__main__':
dr=driver()
dr.run()
print(driver.hello.value)
## -- End pasted text --
('update from', 0)
('update from', 1)
('update from', 2)
('update from', 5)
('update from', 6)
('update from', 7)
('update from', 4)
('update from', 3)
('update from', 8)
('update from', 9)
10

This works now, I do it by changing Padraic solution from class variable to instance variable
from multiprocessing import Process, Lock, Value
class myWorker:
def __init__(self, lock, driver, i):
self.idx=i
self.driver=driver
self.lock=lock
def run(self):
self.driver.updateHello(self.lock,self.idx)
class driver(object):
#hello = Value("i",lock=True) # create shared variable of type int
#hello.value = 0
def __init__(self):
self.lock=Lock()
self.hello=Value("i",0)
def workerrun(self,lock, i):
worker1=myWorker(lock,self,i)
worker1.run()
def run(self):
D=[Process(target=self.workerrun,args=(self.lock,i)) for i in range(10)]
for d in D:
d.start()
for d in D:
d.join()
def updateHello(self,l,i):
with self.lock: # acquire lock
self.hello.value += 1
print ("update from", i)
# release lock
if __name__=='__main__':
dr=driver()
dr.run()
print(dr.hello.value)

Related

Write data to multiple files using multiple threads

I want to create multiple file using multiple threads, and append data (after some operation is performed) to corresponding files from their respective threads.
I tried it, but data is getting messed between threads and correct data is not added to respective files.
import threading
import time
exitFlag = 0
class myThread (threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
with open('file_'+count+'_logs.txt', 'a+') as result:
result.write("Starting " + self.name)
result.write("Exiting " + self.name)
print ("Starting " + self.name)
print_time(self.name, self.counter, 5)
print ("Exiting " + self.name)
def print_time(threadName, delay, counter):
while counter:
if exitFlag:
threadName.exit()
time.sleep(delay)
print ("%s: %s" % (threadName, time.ctime(time.time())))
counter -= 1
myList = ['string0', 'string1', 'string2', 'string3']
if __name__ == "__main__":
count = 0
for data in myList:
count += 1
mythread = myThread(count, "Thread-" + str(count), count)
mythread.start()
mythread.join()
I expect that 4 files shall be created from 4 threads, and data from thread 1 should be written to file_1_logs.txt and so on...
But while writing data, sometimes all data is written in a single file.
How do I write this data to file correctly?
Don't use higher-scope or global variables in threads. Every variable (that you want to modify) must be local to the thread.
That means you need to pass the initial values of everything to the Thread constructor. The same is true for functions like your print_time. Either everything the function needs to do its job is passed via the arguments – or you turn it into a class method.
Consider the following changes. Note how MyThread is completely self-contained:
from threading import Thread
from time import sleep
from datetime import datetime
class MyThread(Thread):
def __init__(self, threadID, name, delay, data):
Thread.__init__(self)
self.threadID = threadID
self.name = name
self.data = data
self.delay = delay
self.logfile = None
def run(self):
with open('file_%s_logs.txt' % self.threadID, 'a+') as logfile:
self.logfile = logfile
self.log("Starting")
self.print_time(5)
self.log("Exiting")
self.logfile = None
def print_time(self, repeat):
for c in range(repeat):
sleep(self.delay)
self.log(self.data)
def log(self, message):
now = datetime.now().isoformat()
formatted_line = "%s:%s:%s" % (now, self.name, message)
print(formatted_line)
if self.logfile:
self.logfile.write(formatted_line + '\n')
if __name__ == "__main__":
myList = ['string0', 'string1', 'string2', 'string3']
threads = []
# spawn threads
for idx, data in enumerate(myList):
thread = MyThread(idx, "Thread-%s" % idx, idx, data)
threads.append(thread)
thread.start()
# wait for threads to finish
while True:
if any(thread.is_alive() for thread in threads):
sleep(0.1)
else:
print("All done.")
break

Pass object to two threads

I have 3 threads. 1 threads collects data and returns it
var1 = Thread1.start()
Thread 2 and thread 3 use this variable var1 to do routine.
I'm not sure If I do it right. because sometimes var1 is returned and it's not an empty list, I store it in variable of each thread, use list comprehension to extract data. In debugging logs I see elements that must be in thread 3, but debugging logger of that thread returns nothing.
In Thread 2 and thread 3 algoritm:
def __init__(self):
self.lock = threading.RLock()
def do_smth2(self,var1):
self.lock.acquire()
var1_2 = var1
self.lock.release()
def do_smth3(self,var3)
self.lock.acquire()
var1_3 = var1
self.lock.release()
in main
object = thread1.start()
thread2.start(object)
thread3.start(object)
Thread 2 and thread 3 run in the same time, and I use time.sleep(3) if var1_3 or var1_2 is None (this is a list type) or when len(var1_3) <0
EDIT
class Application:
def __init__(self):
self.logger = RootLogger()
self.logger.set_config(__name__, sys_log)
self.adapter = Adapter()
self.transit_listener = TransitListener()
def run(self):
#start listeners
transits_list = self.transit_listener.start()
self.adapter.start(transits_list)
# start REST service
RestWebService().run()
Thread 1
class TransitListener:
def __init__(self):
self.interval = session_interval
def _transits_data(self):
# while polling is running change interval after 1st cycle
while datetime.now() >= session_interval:
result = self.connector.query(self.statement,
fetch=True)
self.logger.debug(result)
# store result
self.transits_queue.put(result)
self.logger.debug(self.interval)
time.sleep(5)
# change interval
self._interval_granularity()
self.logger.debug(self.interval)
def start(self):
self.worker = Thread(target=self._transits_data)
self._configure()
self.logger.info("Starting thread 'transists listener'...")
try:
self.worker.start()
if self.worker.is_alive():
self.logger.info("Thread 'transits listener' started")
# return result from queue
return self.transits_queue.get()
Thread 2 and 3
class Adapter:
def __init__(self):
self.logger = RootLogger()
self.logger.set_config(name=__name__, logfile=epp_log)
self.lock = RLock()
self.threads = []
def _session_start(self, transits):
while datetime.now() >= session_interval:
self.lock.acquire()
transit_list = transits
self.lock.release()
self.logger.debug(f"ENTRIES {transit_list}")
def _session_stop(self, transits):
while datetime.now() >= session_interval:
self.lock.acquire()
transit_list = transits
self.lock.release()
self.logger.debug(f"EXITS{transit_list}")
def start(self, transits):
# prepare SQL tables
# define priority of threads
# 1st
session_start_thread = Thread(target=self._session_start, args=(transits,))
self.threads.append(session_start_thread)
# 2nd
session_stop_thread = Thread(target=self._session_stop, args=(transits,))
self.threads.append(session_stop_thread)
self.threads[0].start()
self.threads[1].start()

Modifying and acessing of variables of a different class by thread in python

Am very much new to python. So what i might be asking may not be correct. What am looking to do is. Create a thread from mainss and start the thread. When the thread is started i want it to access the variable of the mainss class from where thread is created and Modify the variable values. And i want the execution of mainss to sleep until thread modifies one of its variable value. How can i achieve this? Here is the code i tried below. Comment in the code of mythread.py class is where I need to modify the value of count variable of mainss class
main.py
#!/usr/bin/python
import time
from myThread import myThread
class mainss():
def __init__(self):
print "s"
def callThread(self):
global count
count = 1
# Create new threads
thread1 = myThread(1, "Thread-1", 1, count)
thread1.start()
# time.sleep(10) until count value is changed by thread to 3
print "Changed Count value%s " % count
print "Exiting"
m = mainss()
m.callThread()
myThread.py
#!/usr/bin/python
import threading
import time
exitFlag = 0
class myThread (threading.Thread):
def __init__(self, threadID, name, counter, count):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
self.count = count
def run(self):
print_time(self.name, 1, 5, self.count)
def print_time(threadName, delay, counter, count):
from main import mainss
while counter:
if exitFlag:
threadName.exit()
time.sleep(delay)
count = count + 1
print "count %s" % (count)
# here i want to modify count of mainss class
counter -= 1
Thanks in advance
Using Multiprocessing, a Manager dictionary is used to communicate between or to processes https://pymotw.com/3/multiprocessing/communication.html#managing-shared-state Note that the Manager dictionary can be changed while the process is running. Multiprocessing also has a wait feature https://pymotw.com/3/multiprocessing/communication.html#signaling-between-processes
I would use a threading.Event and a Queue
Something like this, (Please note that i didn't test this myself and obviously you gonna have to make some changes.)
main.py
import Queue
import threading
from myThread import myThread
class mainss:
def __init__(self):
self.queue = Queue.Queue()
self.event = threading.Event()
def callThread(self):
self.queue.put(1) # Put a value in the queue
t = myThread(self.queue, self.event)
t.start()
self.event.wait() # Wait for the value to update
count = self.queue.get()
print "Changed Count value %s" % count
if __name__ == '__main__':
m = mainss()
m.callThread()
myThread.py
import threading
class myThread(threading.Thread):
def __init__(self, queue, event):
super(myThread, self).__init__()
self.queue = queue
self.event = event
def run(self):
while True:
count = self.queue.get() # Get the value (1)
count += 1
print "count %s" % (count)
self.queue.put(count) # Put updated value
self.event.set() # Notify main thread
break

Python : multiprocessing and Array of c_char_p

I'm launching 3 processes and I want them to put a string into a shared array, at the index corresponding to the process (i).
Look at the code below, the output generated is:
['test 0', None, None]
['test 1', 'test 1', None]
['test 2', 'test 2', 'test 2']
Why 'test 0' get overwritten by test 1, and test 1 by test 2?
What I want is (order is not important) :
['test 0', None, None]
['test 0', 'test 1', None]
['test 0', 'test 1', 'test 2']
The code :
#!/usr/bin/env python
import multiprocessing
from multiprocessing import Value, Lock, Process, Array
import ctypes
from ctypes import c_int, c_char_p
class Consumer(multiprocessing.Process):
def __init__(self, task_queue, result_queue, arr, lock):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.arr = arr
self.lock = lock
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(arr=self.arr, lock=self.lock)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, arr=None, lock=None):
with lock:
arr[self.i] = "test %d" % self.i
print arr[:]
def __str__(self):
return 'ARC'
def run(self):
print 'IN'
if __name__ == '__main__':
tasks = multiprocessing.JoinableQueue()
results = multiprocessing.Queue()
arr = Array(ctypes.c_char_p, 3)
lock = multiprocessing.Lock()
num_consumers = multiprocessing.cpu_count() * 2
consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
I'm running Python 2.7.3 (Ubuntu)
This problem seems similar to this one. There, J.F. Sebastian speculated that the assignment to arr[i] points arr[i] to a memory address that was only meaningful to the subprocess making the assignment. The other subprocesses retrieve garbage when looking at that address.
There are at least two ways to avoid this problem. One is to use a multiprocessing.manager list:
import multiprocessing as mp
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue, lock, lst):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.lock = lock
self.lst = lst
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(lock = self.lock, lst = self.lst)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, lock, lst):
with lock:
lst[self.i] = "test {}".format(self.i)
print([lst[i] for i in range(3)])
if __name__ == '__main__':
tasks = mp.JoinableQueue()
results = mp.Queue()
manager = mp.Manager()
lst = manager.list(['']*3)
lock = mp.Lock()
num_consumers = mp.cpu_count() * 2
consumers = [Consumer(tasks, results, lock, lst) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
tasks.join()
Another way is to use a shared array with a fixed size such as mp.Array('c', 10).
import multiprocessing as mp
class Consumer(mp.Process):
def __init__(self, task_queue, result_queue, arr, lock):
mp.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.arr = arr
self.lock = lock
def run(self):
proc_name = self.name
while True:
next_task = self.task_queue.get()
if next_task is None:
self.task_queue.task_done()
break
answer = next_task(arr = self.arr, lock = self.lock)
self.task_queue.task_done()
self.result_queue.put(answer)
return
class Task(object):
def __init__(self, i):
self.i = i
def __call__(self, arr, lock):
with lock:
arr[self.i].value = "test {}".format(self.i)
print([a.value for a in arr])
if __name__ == '__main__':
tasks = mp.JoinableQueue()
results = mp.Queue()
arr = [mp.Array('c', 10) for i in range(3)]
lock = mp.Lock()
num_consumers = mp.cpu_count() * 2
consumers = [Consumer(tasks, results, arr, lock) for i in xrange(num_consumers)]
for w in consumers:
w.start()
for i in xrange(3):
tasks.put(Task(i))
for i in xrange(num_consumers):
tasks.put(None)
tasks.join()
I speculate that the reason why this works when mp.Array(ctypes.c_char_p, 3) does not, is because mp.Array('c', 10) has a fixed size so the memory address never changes, while mp.Array(ctypes.c_char_p, 3) has a variable size, so the memory address might change when arr[i] is assigned to a bigger string.
Perhaps this is what the docs are warning about when it states,
Although it is possible to store a pointer in shared memory remember
that this will refer to a location in the address space of a specific
process. However, the pointer is quite likely to be invalid in the
context of a second process and trying to dereference the pointer from
the second process may cause a crash.

Python multiprocessing RemoteManager under a multiprocessing.Process

I'm trying to start a data queue server under a managing process (so that it can later be turned into a service), and while the data queue server function works fine in the main process, it does not work in a process created using multiprocessing.Process.
The dataQueueServer and dataQueueClient code is based on the code from the multiprocessing module documentation here.
When run on its own, dataQueueServer works well. However, when run using a multiprocessing.Process's start() in mpquueue, it doesn't work (when tested with the client). I am using the dataQueueClient without changes to test both cases.
The code does reach the serve_forever in both cases, so I think the server is working, but something is blocking it from communicating back to the client in the mpqueue case.
I have placed the loop that runs the serve_forever() part under a thread, so that it can be stoppable.
Here is the code:
mpqueue # this is the "manager" process trying to spawn the server in a child process
import time
import multiprocessing
import threading
import dataQueueServer
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueServer(multiprocessing.Process):
def __init__(self, name = '', printer = None):
multiprocessing.Process.__init__(self)
self.name = name
self.printer = printer
self.ml = dataQueueServer.MainLoop(name = 'ml', printer = self.printer)
def run(self):
self.printer.tsprint(self.ml)
self.ml.start()
def stop(self):
self.ml.stop()
if __name__ == '__main__':
printer = Printer()
qs = QueueServer(name = 'QueueServer', printer = printer)
printer.tsprint(qs)
printer.tsprint('starting')
qs.start()
printer.tsprint('started.')
printer.tsprint('Press Ctrl-C to quit')
try:
while True:
time.sleep(60)
except KeyboardInterrupt:
printer.tsprint('\nTrying to exit cleanly...')
qs.stop()
printer.tsprint('stopped')
dataQueueServer
import time
import threading
from multiprocessing.managers import BaseManager
from multiprocessing import Queue
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
## Define some helper functions for use by the main process loop
class Printer():
def __init__(self):
self.lock = threading.Lock()
def tsprint(self, text):
with self.lock:
print text
class QueueManager(BaseManager):
pass
class MainLoop(threading.Thread):
"""A thread based loop manager, allowing termination signals to be sent
to the thread"""
def __init__(self, name = '', printer = None):
threading.Thread.__init__(self)
self._stopEvent = threading.Event()
self.daemon = True
self.name = name
if printer is None:
self.printer = Printer()
else:
self.printer = printer
## create the queue
self.queue = Queue()
## Add a function to the handler to return the queue to clients
self.QM = QueueManager
self.QM.register('get_queue', callable=lambda:self.queue)
self.queue_manager = self.QM(address=(HOST, PORT), authkey=AUTHKEY)
self.queue_server = self.queue_manager.get_server()
def __del__(self):
self.printer.tsprint( 'closing...')
def run(self):
self.printer.tsprint( '{}: started serving'.format(self.name))
self.queue_server.serve_forever()
def stop(self):
self.printer.tsprint ('{}: stopping'.format(self.name))
self._stopEvent.set()
def stopped(self):
return self._stopEvent.isSet()
def start():
printer = Printer()
ml = MainLoop(name = 'ml', printer = printer)
ml.start()
return ml
def stop(ml):
ml.stop()
if __name__ == '__main__':
ml = start()
raw_input("\nhit return to stop")
stop(ml)
And a client:
dataQueueClient
import datetime
from multiprocessing.managers import BaseManager
n = 0
N = 10**n
HOST = ''
PORT = 50010
AUTHKEY = 'authkey'
def now():
return datetime.datetime.now()
def gen(n, func, *args, **kwargs):
k = 0
while k < n:
yield func(*args, **kwargs)
k += 1
class QueueManager(BaseManager):
pass
QueueManager.register('get_queue')
m = QueueManager(address=(HOST, PORT), authkey=AUTHKEY)
m.connect()
queue = m.get_queue()
def load(msg, q):
return q.put(msg)
def get(q):
return q.get()
lgen = gen(N, load, msg = 'hello', q = queue)
t0 = now()
while True:
try:
lgen.next()
except StopIteration:
break
t1 = now()
print 'loaded %d items in ' % N, t1-t0
t0 = now()
while queue.qsize() > 0:
queue.get()
t1 = now()
print 'got %d items in ' % N, t1-t0
So it seems like the solution is simple enough: Don't use serve_forever(), and use manager.start() instead.
According to Eli Bendersky, the BaseManager (and it's extended version SyncManager) already spawns the server in a new process (and looking at the multiprocessing.managers code confirms this). The problem I have been experiencing stems from the form used in the example, in which the server is started under the main process.
I still don't understand why the current example doesn't work when run under a child process, but that's no longer an issue.
Here's the working (and much simplified from OP) code to manage multiple queue servers:
Server:
from multiprocessing import Queue
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
name0 = 'qm0'
name1 = 'qm1'
name2 = 'qm2'
description = 'Queue Server'
def CreateQueueServer(HOST, PORT, AUTHKEY, name = None, description = None):
name = name
description = description
q = Queue()
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue', callable = lambda: q)
QueueManager.register('get_name', callable = name)
QueueManager.register('get_description', callable = description)
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.start() # This actually starts the server
return manager
# Start three queue servers
qm0 = CreateQueueServer(HOST, PORT0, AUTHKEY, name0, description)
qm1 = CreateQueueServer(HOST, PORT1, AUTHKEY, name1, description)
qm2 = CreateQueueServer(HOST, PORT2, AUTHKEY, name2, description)
raw_input("return to end")
Client:
from multiprocessing.managers import SyncManager
HOST = ''
PORT0 = 5011
PORT1 = 5012
PORT2 = 5013
AUTHKEY = 'authkey'
def QueueServerClient(HOST, PORT, AUTHKEY):
class QueueManager(SyncManager):
pass
QueueManager.register('get_queue')
QueueManager.register('get_name')
QueueManager.register('get_description')
manager = QueueManager(address = (HOST, PORT), authkey = AUTHKEY)
manager.connect() # This starts the connected client
return manager
# create three connected managers
qc0 = QueueServerClient(HOST, PORT0, AUTHKEY)
qc1 = QueueServerClient(HOST, PORT1, AUTHKEY)
qc2 = QueueServerClient(HOST, PORT2, AUTHKEY)
# Get the queue objects from the clients
q0 = qc0.get_queue()
q1 = qc1.get_queue()
q2 = qc2.get_queue()
# put stuff in the queues
q0.put('some stuff')
q1.put('other stuff')
q2.put({1:123, 2:'abc'})
# check their sizes
print 'q0 size', q0.qsize()
print 'q1 size', q1.qsize()
print 'q2 size', q2.qsize()
# pull some stuff and print it
print q0.get()
print q1.get()
print q2.get()
Adding an additional server to share a dictionary with the information of the running queue servers so that consumers can easily tell what's available where is easy enough using that model. One thing to note, though, is that the shared dictionary requires slightly different syntax than a normal dictionary: dictionary[0] = something will not work. You need to use dictionary.update([(key, value), (otherkey, othervalue)]) and dictionary.get(key) syntax, which propagates across to all other clients connected to this dictionary..

Categories