I'm developing an application for Windows operating systems written in Python 3.8 and which makes use of the nnunet library (https://pypi.org/project/nnunet/) which uses multiprocessing. I have tested the script and it works correctly.
Now I'm trying to package everything with pyinstaller v5.7.0. The creation of the .exe is successful but when I run it I get the following error:
Traceback (most recent call last):
File "main.py", line 344, in <module>
File "nnunet\inference\predict.py", line 694, in predict_from_folder
File "nnunet\inference\predict.py", line 496, in predict_cases_fastest
File "nnunet\inference\predict.py", line 123, in preprocess_multithreaded
File "multiprocess\process.py", line 121, in start
File "multiprocess\context.py", line 224, in _Popen
File "multiprocess\context.py", line 327, in _Popen
File "multiprocess\popen_spawn_win32.py", line 93, in __init__
File "multiprocess\reduction.py", line 70, in dump
File "dill\_dill.py", line 394, in dump
File "pickle.py", line 487, in dump
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
Traceback (most recent call last):
File "main.py", line 341, in <module>
File "pickle.py", line 997, in _batch_setitems
File "D:\MyProject\venv\Lib\site-packages\PyInstaller\hooks\rthooks\pyi_rth_multiprocessing.py", line 49, in _freeze_support
File "dill\_dill.py", line 388, in save
spawn.spawn_main(**kwds)
File "pickle.py", line 560, in save
File "pickle.py", line 901, in save_tuple
File "dill\_dill.py", line 388, in save
File "multiprocessing\spawn.py", line 116, in spawn_main
File "pickle.py", line 560, in save
File "multiprocessing\spawn.py", line 126, in _main
File "dill\_dill.py", line 1427, in save_instancemethod0
EOFError: Ran out of input
[588] Failed to ex File "pickle.py", line 692, in save_reduce
ecute script 'main' d File "dill\_dill.py", line 388, in save
ue to unhandled File "pickle.py", line 560, in save
exception!
File "pickle.py", line 886, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 687, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1698, in save_type
File "dill\_dill.py", line 1070, in _save_with_postproc
File "pickle.py", line 692, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "pickle.py", line 901, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "pickle.py", line 886, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1698, in save_type
File "dill\_dill.py", line 1084, in _save_with_postproc
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 578, in save
File "PyInstaller\loader\pyimod01_archive.py", line 76, in __getattr__
AssertionError
[4392] Failed to execute script 'main' due to unhandled exception!
Below is the code of my python script:
#==============================
# main.py
#==============================
from multiprocessing import freeze_support
from nnunet.inference.predict import predict_from_folder
if __name__ == "__main__":
freeze_support()
...
predict_from_folder(...)
...
Below is the code of the nnunet library that triggers the error:
#==============================
# nnunet\inference\predict.py
#==============================
def preprocess_multithreaded(trainer, list_of_lists, output_files, num_processes=2, segs_from_prev_stage=None):
if segs_from_prev_stage is None:
segs_from_prev_stage = [None] * len(list_of_lists)
num_processes = min(len(list_of_lists), num_processes)
classes = list(range(1, trainer.num_classes))
assert isinstance(trainer, nnUNetTrainer)
q = Queue(1)
processes = []
for i in range(num_processes):
pr = Process(
target=preprocess_save_to_queue,
args=(
trainer.preprocess_patient,
q,
list_of_lists[i::num_processes],
output_files[i::num_processes],
segs_from_prev_stage[i::num_processes],
classes,
trainer.plans['transpose_forward']
)
)
pr.start() ## <------------ The error is generated here!!!!!!!!!!!!!
processes.append(pr)
try:
end_ctr = 0
while end_ctr != num_processes:
item = q.get()
if item == "end":
end_ctr += 1
continue
else:
yield item
finally:
for p in processes:
if p.is_alive():
p.terminate()
p.join()
q.close()
def predict_cases_fastest(...):
...
pool = Pool(num_threads_nifti_save)
...
preprocessing = preprocess_multithreaded(
trainer,
list_of_lists,
cleaned_output_files,
num_threads_preprocessing,
segs_from_prev_stage
)
...
pool.starmap_async(...)
...
pool.close()
pool.join()
def predict_from_folder(...):
...
return predict_cases_fastest(...)
if __name__ == "__main__":
...
Edit 03-02-2023
I have created a public project with which it is possible to reproduce the reported problem: https://gitlab.com/carlopoletto/nnunet_pyinstaller_problem
In the ./scripts folder there are some scripts to install everything and run the tests:
./scripts/install: dependency installation
./scripts/dist: creating the executable with pyinstaller
./scripts/run_py: running the python script (NB: this script automatically delete the ./temp folder and recreate it by copying the contents of ./data)
./scripts/run_exe: running the executable created with ./scripts/dist (NB: this script automatically delete the ./temp folder and recreate it by copying the contents of ./data)
The problem appears to be internal to the nnunet library. I don't know if this problem can be solved by properly configuring pyinstaller.
I've a spreadsheet (~50 mb) with multiple sheets, and I'm trying to read it using pandas.
import pandas as pd
df = pd.read_excel('compiled_output.xlsx', sheet_name='Sheet1')
I'm not sure why it's throwing lxml.etree.XMLSyntaxError; I've done this many times before. I also tried passing engine=openpyxl, downgraded to pandas==1.2.4 but I get the same error:
df = pd.read_excel('compiled_output.xlsx', sheet_name='Sheet1')
File "/usr/local/lib/python3.9/site-packages/pandas/util/_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 336, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "/usr/local/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 1131, in __init__
self._reader = self._engines[engine](self._io, storage_options=storage_options)
File "/usr/local/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 475, in __init__
super().__init__(filepath_or_buffer, storage_options=storage_options)
File "/usr/local/lib/python3.9/site-packages/pandas/io/excel/_base.py", line 391, in __init__
self.book = self.load_workbook(self.handles.handle)
File "/usr/local/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py", line 486, in load_workbook
return load_workbook(
File "/usr/local/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 317, in load_workbook
reader.read()
File "/usr/local/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 282, in read
self.read_worksheets()
File "/usr/local/lib/python3.9/site-packages/openpyxl/reader/excel.py", line 216, in read_worksheets
rels = get_dependents(self.archive, rels_path)
File "/usr/local/lib/python3.9/site-packages/openpyxl/packaging/relationship.py", line 131, in get_dependents
node = fromstring(src)
File "src/lxml/etree.pyx", line 3237, in lxml.etree.fromstring
File "src/lxml/parser.pxi", line 1896, in lxml.etree._parseMemoryDocument
File "src/lxml/parser.pxi", line 1784, in lxml.etree._parseDoc
File "src/lxml/parser.pxi", line 1141, in lxml.etree._BaseParser._parseDoc
File "src/lxml/parser.pxi", line 615, in lxml.etree._ParserContext._handleParseResultDoc
File "src/lxml/parser.pxi", line 725, in lxml.etree._handleParseResult
File "src/lxml/parser.pxi", line 654, in lxml.etree._raiseParseError
File "<string>", line 2
lxml.etree.XMLSyntaxError: internal error: Huge input lookup, line 2, column 12753697
Could someone help me understand why this code thrown an error and provide a solution how I could save my file on my local machine:
myLast = result[:1]
for x in myLast:
urldailyLocal= os.path.basename(x)
s=requests.get(x, verify=False).content
c=pd.read_csv(s)
c.to_csv('path/to/my/file/'+urldailyLocal, index=False)
error after running the above code:
Traceback (most recent call last):
File "<stdin>", line 4, in <module>
File "/usr/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 676, in parser_f
return _read(filepath_or_buffer, kwds)
File "/usr/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 448, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "/usr/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 880, in __init__
self._make_engine(self.engine)
File "/usr/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 1114, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/usr/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 1891, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas/_libs/parsers.pyx", line 374, in pandas._libs.parsers.TextReader.__cinit__
File "pandas/_libs/parsers.pyx", line 694, in pandas._libs.parsers.TextReader._setup_parser_source
OSError: Expected file path name or file-like object, got <class 'bytes'> type
myLast list stores an url in such format:
'https://test.com/something/example_2020-09-27-10.51PST_ALL.csv'
The below should work for you
s = requests.get(x, verify=False).content
df = pd.read_csv(io.StringIO(s.decode('utf-8')))
I'm using Pycharm for python project. My project structure is like this:
+ project
+ src
- Data.csv
- main.py
This is main.py:
import panda as pd
dataset = pd.read_csv("Data.csv")
When i use 'Execute Line in Console' and run the second line, i get this error:
Traceback (most recent call last):
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\IPython\core\interactiveshell.py", line 3296, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-14-bd7168d85704>", line 1, in <module>
dataset = pd.read_csv('Data.csv')
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 702, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 429, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 895, in __init__
self._make_engine(self.engine)
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 1122, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\livw2\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 1853, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 387, in pandas._libs.parsers.TextReader.__cinit__
File "pandas\_libs\parsers.pyx", line 705, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File b'Data.csv' does not exist: b'Data.csv'
When i run the whole code using 'Run', it's fine, so i think that executing in console changes the directory. But i haven't figured out how to fix.
To fix it, just close your current project and open another project which contain your file directly.
I am using pycharm and when i run a code of opening a csv file using pandas I am getting an error of no existence.
I saved the csv file in my project directory and called it using pandas.
import pandas as pd
df = pd.read_csv("E:\\students")
print(df)
The error when i run the code:
Traceback (most recent call last): File "E:/untitled232/file1.py", line 2, in <module>
df = pd.read_csv("E:\\students") File "E:\untitled232\venv\lib\site-packages\pandas\io\parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds) File "E:\untitled232\venv\lib\site-packages\pandas\io\parsers.py", line 440, in _read
parser = TextFileReader(filepath_or_buffer, **kwds) File "E:\untitled232\venv\lib\site-packages\pandas\io\parsers.py", line 787, in __init__
self._make_engine(self.engine) File "E:\untitled232\venv\lib\site-packages\pandas\io\parsers.py", line 1014, in _make_engine
self._engine = CParserWrapper(self.f, **self.options) File "E:\untitled232\venv\lib\site-packages\pandas\io\parsers.py", line 1708, in __init__
self._reader = parsers.TextReader(src, **kwds) File "pandas\_libs\parsers.pyx", line 384, in pandas._libs.parsers.TextReader.__cinit__ File "pandas\_libs\parsers.pyx", line 695, in pandas._libs.parsers.TextReader._setup_parser_source FileNotFoundError: File b'E:\\students' does not exist
It seems I had to put .csv after the name.