Tkinter with multiprocessing: OSError [Errno 22] Invalid argument - python

I'm trying to add multiprocessing to my tkinter app and I've been having issues with the error: TypeError: cannot pickle '_tkinter.tkapp' object. I had a look at the solution proposed in the question here and tried to implement my own version of it and this appears to have solved this particular error but now I instead I have constant OSError: [Errno 22] Invalid argument:
What I aspire to have the code do is that some calculation is being performed in the background and results of this calculation are being put into the Queue (here just integers but will be Numpy arrays in the actual code). The GUI application then displays some statistics and results to the user.
from multiprocessing import Process, Queue
from queue import Empty
import tkinter as tk
from tkinter import Tk
class FooUI(Process):
def __init__(self, q: Queue):
super().__init__(target=self, args=(q,))
self.queue = q
self.duh = []
self.root = Tk()
self._create_interface()
self.root.after(100, self._check_queue)
self.root.mainloop()
def _check_queue(self):
try:
out = self.queue.get_nowait()
if out:
self.duh.append(out)
print(self.duh)
return
except Empty:
pass
self.root.after(100, self._check_queue)
def _create_interface(self):
self.root.geometry("100x100")
b = tk.Button(self.root, text='Start', command=self.calc)
b.grid(row=0, column=0)
def calc(self):
p = Process(target=do_calc)
p.start()
def do_calc(q: Queue):
for i in range(20):
q.put(i**2)
If __name__ == '__main__':
q = Queue()
f = FooUI(q)
f.start()
And here is the traceback:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\runpy.py", line 264, in run_path
code, fname = _get_code_from_file(run_name, path_name)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\runpy.py", line 234, in _get_code_from_file
with io.open_code(decoded_path) as f:
OSError: [Errno 22] Invalid argument: 'C:\\python\\block_model_variable_imputer\\<input>'
Traceback (most recent call last):
File "<input>", line 3, in <module>
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\process.py", line 121, in start
self._popen = self._Popen(self)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\context.py", line 327, in _Popen
return Popen(process_obj)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
reduction.dump(process_obj, to_child)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle '_tkinter.tkapp' object
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 125, in _main
prepare(preparation_data)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 236, in prepare
_fixup_main_from_path(data['init_main_from_path'])
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\multiprocessing\spawn.py", line 287, in _fixup_main_from_path
main_content = runpy.run_path(main_path,
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\runpy.py", line 264, in run_path
code, fname = _get_code_from_file(run_name, path_name)
File "C:\Users\cherp2\AppData\Local\Programs\Python\Python38\lib\runpy.py", line 234, in _get_code_from_file
with io.open_code(decoded_path) as f:
OSError: [Errno 22] Invalid argument: 'C:\\python\\block_model_variable_imputer\\<input>'
I've been trying for a while to get it to work. Any help will be greatly appreciated!

You do the subclass of Process() in a wrong way. You need to override the run() method instead of passing target option.
from multiprocessing import Process, Queue
from queue import Empty
import tkinter as tk
class FooUI(Process):
def __init__(self, q: Queue):
super().__init__() # don't pass target and args options
self.queue = q
self.duh = []
# override run() method and create the Tk() inside the function
def run(self):
self.root = tk.Tk()
self._create_interface()
self.root.after(100, self._check_queue)
self.root.mainloop()
def _check_queue(self):
try:
out = self.queue.get_nowait()
if out:
self.duh.append(out)
print(self.duh)
#return
except Empty:
pass
self.root.after(100, self._check_queue)
def _create_interface(self):
self.root.geometry("100x100")
b = tk.Button(self.root, text='Start', command=self.calc)
b.grid(row=0, column=0)
def calc(self):
if self.queue.empty():
self.duh.clear()
p = Process(target=do_calc, args=[self.queue]) # pass self.queue to do_calc()
p.start()
def do_calc(q: Queue):
for i in range(20):
q.put(i**2)
if __name__ == '__main__':
q = Queue()
f = FooUI(q)
f.start()

Related

Sharing dictionary over multiprocesses (TypeError: cannot pickle 'weakref' object)

I want to create a class Storage where each object has a dictionary orderbooks as a property.
I want to write on orderbooks from the main process by invoking the method write, but I want to defer this action to another process and ensuring that the dictionary orderbooks is accessible from the main process.
To do so, I create a Mananger() that I pass during the definition of the object and that is used to notify the processes about the changes of the dictionary. My code is the following:
from multiprocessing import Process, Manager
class Storage():
def __init__(self,manager):
self.manager = manager
self.orderbooks = self.manager.dict()
def store_value(self,el):
self.orderbooks[el[0]] = el[1]
def write(self,el:list):
p = Process(target=self.store_value,args=(el,))
p.start()
if __name__ == '__main__':
manager=Manager()
book1 = Storage(manager)
book1.write([0,1])
However, when I run this code, I get the following error
Traceback (most recent call last):
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 21, in <module>
book1.write([0,1])
File "/Users/main_user/PycharmProjects/handle_queue/main.py", line 13, in write
p.start()
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/process.py", line 121, in start
self._popen = self._Popen(self)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 224, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/context.py", line 284, in _Popen
return Popen(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 32, in __init__
super().__init__(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_fork.py", line 19, in __init__
self._launch(process_obj)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/popen_spawn_posix.py", line 47, in _launch
reduction.dump(process_obj, fp)
File "/Users/main_user/opt/anaconda3/envs/handle_queue/lib/python3.10/multiprocessing/reduction.py", line 60, in dump
ForkingPickler(file, protocol).dump(obj)
TypeError: cannot pickle 'weakref' object
What is wrong with my code?
Per Aaron's posted comment:
from multiprocessing import Process, Manager
class Storage():
def __init__(self, orderbooks):
self.orderbooks = orderbooks
def store_value(self, el):
self.orderbooks[el[0]] = el[1]
def write(self, el: list):
p = Process(target=self.store_value, args=(el,))
p.start()
# Ensure we do not return until store_value has
# completed updating the dictionary:
p.join()
if __name__ == '__main__':
manager = Manager()
orderbooks = manager.dict()
book1 = Storage(orderbooks)
book1.write([0, 1])
print(orderbooks)
Prints:
{0: 1}

Which Methods Open A File in Python

Im trying to run code which writes its outputs to a bunch of files (upwards of 250). Im currently using:
with open(self.file, self.mode, newline='') as file_writer:
writer = csv.writer(file_writer)
writer.writerow(row)
This should auto close my files but I'm still getting the following:
52 of 231 | Analysing player: Evolved ANN 5 Noise 05 ...
Traceback (most recent call last):
File "fullAnalysis.py", line 200, in <module>
run_one.start()
File "fullAnalysis.py", line 179, in start
print_output=False)
File "/home/vi/td/axelrod-dojo/src/axelrod_dojo/algorithms/genetic_algorithm.py", line 28, in __init__
self.pool = Pool(processes=self.processes)
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/context.py", line 119, in Pool
context=self.get_context())
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/pool.py", line 174, in __init__
self._repopulate_pool()
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/pool.py", line 239, in _repopulate_pool
w.start()
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/process.py", line 105, in start
self._popen = self._Popen(self)
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/context.py", line 277, in _Popen
return Popen(process_obj)
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/popen_fork.py", line 26, in __init__
self._launch(process_obj)
File "/home/vi/.conda/envs/project/lib/python3.6/multiprocessing/popen_fork.py", line 72, in _launch
parent_r, child_w = os.pipe()
OSError: [Errno 24] Too many open files
My question is, which methods open files in python so I can find this leak and plug it?
I currently the only file methods I'm useing is a mixture of:
os.makedirs()
os.remove()
csv.writerow() (as show with closure)
(This is the second time its happened on #52 after implementing the with closure code)
EDIT
Below is the main section of code at the top of the trace for fullAnalysis.py:
for opponent in self.opponent_list:
print(i, "of", len(self.opponent_list), "| Analysing player:", str(opponent), "...")
global_processes = 20
# Stochastic players need seeding
if opponent.classifier['stochastic']:
opponent = self._get_seeded_player_class(type(opponent))(self.global_seed)
global_processes = 1
population = axl_dojo.Population(params_class=axl_dojo.CyclerParams,
params_kwargs=cycler_kwargs,
size=POPULATION_SIZE,
# processes=global_processes,
population=getPreMadePop(POPULATION_SIZE),
objective=cycler_objective,
output_filename=self._get_file_name(opponent),
opponents=[opponent],
print_output=False)
population.run(GENERATION_LENGTH)
print("{:.2f}% Done.\tSaved to:".format((100 * i) / len(self.opponent_list)),
self._get_file_name(opponent))
TRACKER.print_diff()
# self.output_files[str(opponent)] = self._get_file_name(opponent)
i += 1
Below is the init code for the axelrod_dojo genetic_algorthm.py:
def __init__(self, params_class, params_kwargs, size, objective, output_filename,
bottleneck=None, mutation_probability=.1, opponents=None,
processes=1, weights=None,
sample_count=None, population=None, print_output=True):
self.params_class = params_class
self.bottleneck = bottleneck
self.print_output = print_output
if processes == 0:
self.processes = cpu_count()
else:
self.processes = processes
self.pool = Pool(processes=self.processes)

Memory issue with multiprocessing in Python

I am trying to use my other cores in my python program. And the following is the basic structure/logic of my code:
import multiprocessing as mp
import pandas as pd
import gc
def multiprocess_RUN(param):
result = Analysis_Obj.run(param)
return result
class Analysis_Obj():
def __init__(self, filename):
self.DF = pd.read_csv(filename)
return
def run_Analysis(self, param):
# Multi-core option
pool = mp.Pool(processes=1)
run_result = pool.map(multiprocess_RUN, [self, param])
# Normal option
run_result = self.run(param)
return run_result
def run(self, param):
# Let's say I have written a function to count the frequency of 'param' in the target file
result = count(self.DF, param)
return result
if __name__ == "__main__":
files = ['file1.csv', 'file2.csv']
params = [1,2,3,4]
results = []
for i in range(0,len(files)):
analysis = Analysis_Obj(files[i])
for j in range(0,len(params)):
result = analysis.run_Analysis(params[j])
results.append(result)
del result
del analysis
gc.collect()
If I comment out the 'Multi-core option' and run the 'Normal option' everything runs fine. But even if I run the 'Multi-core option' with processes=1 I get a Memory Error when my for loop starts on the 2nd file. I have deliberately set it up so that I create and delete an Analysis object in each for loop, so that the file that has been processed will be cleared from memory. Clearly this hasn't worked. Advice of how to get around this would be very much appreciated.
Cheers
EDIT:
Here is the error message I have in the terminal:
Exception in thread Thread-7:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File "/usr/lib/python2.7/multiprocessing/pool.py", line 326, in _handle_workers
pool._maintain_pool()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 230, in _maintain_pool
self._repopulate_pool()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 223, in _repopulate_pool
w.start()
File "/usr/lib/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/usr/lib/python2.7/multiprocessing/forking.py", line 121, in __init__
self.pid = os.fork()
OSError: [Errno 12] Cannot allocate memory

Python multithreading and multiprocessing together

Is it possible to spawn multiple processes from a single thread? Or is it a proper design to implement?
My code sample is -
def run_all_tasks(self):
for platform in self._platforms:
task_thread = threading.Thread(
target=self._run_task_list, args=(
self._get_tasks(),platform,))
taskset_threads.append(task_thread)
for taskset_thread in taskset_threads:
taskset_thread.start()
for taskset_thread in taskset_threads:
taskset_thread.join()
def _run_task_list(self, tasklist, platform):
try:
test_case_name = task.__class__.__name__
try:
test_case_name = task._get_test_case_name()
except:
test_case_name = task.__class__.__name__
pass
max_runtime = task.get_max_runtime()
manager = Manager()
self._shared_mem = manager.dict()
for task in tasklist:
task_proc = Process(
target=self.proc_setup,
args=(task, self, self._shared_mem))
task_proc.start()
task_proc.join(max_runtime)
This works however, sometimes it gives following error -
Traceback (most recent call last):
File "C:\wor\lib\TaskSet.py", line 430, in _run_task_list
if "warning" in self._shared_mem:
File "<string>", line 2, in __contains__
File "C:\Python27\lib\multiprocessing\managers.py", line 755, in _callmethod
self._connect()
File "C:\Python27\lib\multiprocessing\managers.py", line 742, in _connect
conn = self._Client(self._token.address, authkey=self._authkey)
File "C:\Python27\lib\multiprocessing\connection.py", line 167, in Client
c = PipeClient(address)
File "C:\Python27\lib\multiprocessing\connection.py", line 387, in PipeClient
win32.WaitNamedPipe(address, 1000)
WindowsError: [Error 2] The system cannot find the file specified
This can also be seen on linux platform.

FileNotFoundError: [Errno 2] No such file or directory: 'test_user1_user_id'

I got an error FileNotFoundError: [Errno 2] No such file or directory: 'test_user1_user_id'. I wrote in tests.py
from datetime import datetime
from django.test import TestCase
from app.models import Companytransaction
import xlrd
# Create your tests here.
class CompanytransactionModelTests(TestCase):
def __init__(self, sheet_path):
self.book = xlrd.open_workbook(sheet_path)
self.sheet = self.book.sheet_by_index(1)
def setUp(self):
self.book = xlrd.open_workbook('./data/excel1.xlsx')
self.sheet = self.book.sheet_by_index(1)
num = 0
for row_index in range(2,4):
row = self.sheet.row_values(row_index)
user = Companytransaction(user_id=row[1], name=row[2], age=row[3])
user.save()
if num == 0:
self.user1 = Companytransaction.objects.create(user_id=row[1], name=row[2], age=row[3])
num += 1
elif num == 1:
self.user2 = Companytransaction.objects.create(user_id=row[1], name=row[2], age=row[3])
num += 1
else:
self.user3 = Companytransaction.objects.create(user_id=row[1], name=row[2], age=row[3])
def test_user1_company_id(self):
self.assertEqual(self.user1.user_id, '100')
def test_user1_corporation_id(self):
self.assertEqual(self.user1.name, 'Tom')
def test_user1_company_name(self):
self.assertEqual(self.user1.age, '29')
I run python manage.py test & ./manage.py test app.tests, but both of them shows the same error. I surely made user_id column in models.py, so I really cannot understand why this error happens. How can I fix this?
What should I write this?
Here's the traceback.
Traceback (most recent call last):
File "./manage.py", line 22, in <module>
execute_from_command_line(sys.argv)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/__init__.py", line 363, in execute_from_command_line
utility.execute()
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/__init__.py", line 355, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/commands/test.py", line 29, in run_from_argv
super(Command, self).run_from_argv(argv)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/base.py", line 283, in run_from_argv
self.execute(*args, **cmd_options)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/base.py", line 330, in execute
output = self.handle(*args, **options)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/core/management/commands/test.py", line 62, in handle
failures = test_runner.run_tests(test_labels)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/test/runner.py", line 600, in run_tests
suite = self.build_suite(test_labels, extra_tests)
File "/Users/xxx/myenv/lib/python3.5/site-packages/django/test/runner.py", line 484, in build_suite
tests = self.test_loader.loadTestsFromName(label)
File "/Users/xxx/.pyenv/versions/3.5.0/lib/python3.5/unittest/loader.py", line 190, in loadTestsFromName
return self.loadTestsFromModule(obj)
File "/Users/xxx/.pyenv/versions/3.5.0/lib/python3.5/unittest/loader.py", line 123, in loadTestsFromModule
tests.append(self.loadTestsFromTestCase(obj))
File "/Users/xxx/.pyenv/versions/3.5.0/lib/python3.5/unittest/loader.py", line 92, in loadTestsFromTestCase
loaded_suite = self.suiteClass(map(testCaseClass, testCaseNames))
File "/Users/xxx/.pyenv/versions/3.5.0/lib/python3.5/unittest/suite.py", line 24, in __init__
self.addTests(tests)
File "/Users/xxx/.pyenv/versions/3.5.0/lib/python3.5/unittest/suite.py", line 57, in addTests
for test in tests:
File "/Users/xxx/app/app/tests.py", line 12, in __init__
self.book = xlrd.open_workbook(sheet_path)
File "/Users/xxx/myenv/lib/python3.5/site-packages/xlrd/__init__.py", line 116, in open_workbook
with open(filename, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: 'test_user1_user_id'
It looks like the issue is with your __init__ method:
def __init__(self, sheet_path):
self.book = xlrd.open_workbook(sheet_path)
self.sheet = self.book.sheet_by_index(1)
You're getting this error because you aren't providing sheet_path when CompanytransactionModelTests is initialized. We can see from the unittest.TestCase source that modelName is the first & only argument when initializing a TestCase. I'd bet a penny that you've got a method called test_user1_user_id on a class with similar __init__ code.
Setting up that test data in setUp is the right way to do what you're doing. It looks like things should work properly if you get rid of that __init__ code.

Categories