We're running a fairly simple job which reads JSON, does some processing and outputs JSON.
For some reason, this always fails with a very weird "pickling" error:
PicklingError: Can't pickle <type 'generator'>: attribute lookup __builtin__.generator failed [while running 'map to user_activity']
It fails almost immediately on processing the first line of data. The preceding stage emits a tuple of (String, []). When the 'map to user_activity' stage runs, it will fail if it tries to iterate the [].
There are no lambdas, which seem to be a common source of these pickling errors. We've narrowed it down to iterating over the [] in the input tuple. If we don't iterate, then the job will "work". As soon as we do:
for entry in input_tuple:
pass
The job fails immediately.
**** Update ****
It turns out, iterating over the input tuple isn't the key. ANY for loop in the Map function will cause a crash, even something like this:
q=[1,2,3,4,5,6]
for a in q:
pass
Here's the full stack trace for the error:
An exception was raised when trying to execute the workitem 4985068250752295797 : Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 582, in do_work
work_executor.execute()
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/executor.py", line 166, in execute
op.start()
File "dataflow_worker/native_operations.py", line 38, in dataflow_worker.native_operations.NativeReadOperation.start (dataflow_worker/native_operations.c:3175)
def start(self):
File "dataflow_worker/native_operations.py", line 39, in dataflow_worker.native_operations.NativeReadOperation.start (dataflow_worker/native_operations.c:3079)
with self.scoped_start_state:
File "dataflow_worker/native_operations.py", line 44, in dataflow_worker.native_operations.NativeReadOperation.start (dataflow_worker/native_operations.c:2994)
with self.spec.source.reader() as reader:
File "dataflow_worker/native_operations.py", line 54, in dataflow_worker.native_operations.NativeReadOperation.start (dataflow_worker/native_operations.c:2938)
self.output(windowed_value)
File "apache_beam/runners/worker/operations.py", line 154, in apache_beam.runners.worker.operations.Operation.output (apache_beam/runners/worker/operations.c:5783)
cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 86, in apache_beam.runners.worker.operations.ConsumerSet.receive (apache_beam/runners/worker/operations.c:3622)
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 339, in apache_beam.runners.worker.operations.DoOperation.process (apache_beam/runners/worker/operations.c:11089)
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 340, in apache_beam.runners.worker.operations.DoOperation.process (apache_beam/runners/worker/operations.c:11043)
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 382, in apache_beam.runners.common.DoFnRunner.receive (apache_beam/runners/common.c:10156)
self.process(windowed_value)
File "apache_beam/runners/common.py", line 390, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:10458)
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 415, in apache_beam.runners.common.DoFnRunner._reraise_augmented (apache_beam/runners/common.c:11363)
raise
File "apache_beam/runners/common.py", line 388, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:10371)
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 189, in apache_beam.runners.common.SimpleInvoker.invoke_process (apache_beam/runners/common.c:6270)
self.output_processor.process_outputs(
File "apache_beam/runners/common.py", line 480, in apache_beam.runners.common._OutputProcessor.process_outputs (apache_beam/runners/common.c:12500)
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 86, in apache_beam.runners.worker.operations.ConsumerSet.receive (apache_beam/runners/worker/operations.c:3622)
cython.cast(Operation, consumer).process(windowed_value)
File "apache_beam/runners/worker/operations.py", line 339, in apache_beam.runners.worker.operations.DoOperation.process (apache_beam/runners/worker/operations.c:11089)
with self.scoped_process_state:
File "apache_beam/runners/worker/operations.py", line 340, in apache_beam.runners.worker.operations.DoOperation.process (apache_beam/runners/worker/operations.c:11043)
self.dofn_receiver.receive(o)
File "apache_beam/runners/common.py", line 382, in apache_beam.runners.common.DoFnRunner.receive (apache_beam/runners/common.c:10156)
self.process(windowed_value)
File "apache_beam/runners/common.py", line 390, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:10458)
self._reraise_augmented(exn)
File "apache_beam/runners/common.py", line 431, in apache_beam.runners.common.DoFnRunner._reraise_augmented (apache_beam/runners/common.c:11673)
raise new_exn, None, original_traceback
File "apache_beam/runners/common.py", line 388, in apache_beam.runners.common.DoFnRunner.process (apache_beam/runners/common.c:10371)
self.do_fn_invoker.invoke_process(windowed_value)
File "apache_beam/runners/common.py", line 189, in apache_beam.runners.common.SimpleInvoker.invoke_process (apache_beam/runners/common.c:6270)
self.output_processor.process_outputs(
File "apache_beam/runners/common.py", line 480, in apache_beam.runners.common._OutputProcessor.process_outputs (apache_beam/runners/common.c:12500)
self.main_receivers.receive(windowed_value)
File "apache_beam/runners/worker/operations.py", line 84, in apache_beam.runners.worker.operations.ConsumerSet.receive (apache_beam/runners/worker/operations.c:3588)
self.update_counters_start(windowed_value)
File "apache_beam/runners/worker/operations.py", line 90, in apache_beam.runners.worker.operations.ConsumerSet.update_counters_start (apache_beam/runners/worker/operations.c:3808)
self.opcounter.update_from(windowed_value)
File "apache_beam/runners/worker/opcounters.py", line 62, in apache_beam.runners.worker.opcounters.OperationCounters.update_from (apache_beam/runners/worker/opcounters.c:2396)
self.do_sample(windowed_value)
File "apache_beam/runners/worker/opcounters.py", line 80, in apache_beam.runners.worker.opcounters.OperationCounters.do_sample (apache_beam/runners/worker/opcounters.c:3017)
self.coder_impl.get_estimated_size_and_observables(windowed_value))
File "apache_beam/coders/coder_impl.py", line 730, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables (apache_beam/coders/coder_impl.c:22968)
def get_estimated_size_and_observables(self, value, nested=False):
File "apache_beam/coders/coder_impl.py", line 739, in apache_beam.coders.coder_impl.WindowedValueCoderImpl.get_estimated_size_and_observables (apache_beam/coders/coder_impl.c:22687)
self._value_coder.get_estimated_size_and_observables(
File "apache_beam/coders/coder_impl.py", line 260, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.get_estimated_size_and_observables (apache_beam/coders/coder_impl.c:9578)
self.encode_to_stream(value, out, nested)
File "apache_beam/coders/coder_impl.py", line 298, in apache_beam.coders.coder_impl.FastPrimitivesCoderImpl.encode_to_stream (apache_beam/coders/coder_impl.c:10416)
self.fallback_coder_impl.encode_to_stream(value, stream, nested)
File "apache_beam/coders/coder_impl.py", line 154, in apache_beam.coders.coder_impl.CallbackCoderImpl.encode_to_stream (apache_beam/coders/coder_impl.c:5883)
return stream.write(self._encoder(value), nested)
File "/usr/local/lib/python2.7/dist-packages/apache_beam/coders/coders.py", line 437, in <lambda>
lambda x: dumps(x, HIGHEST_PROTOCOL), pickle.loads)
PicklingError: Can't pickle <type 'generator'>: attribute lookup __builtin__.generator failed [while running 'map to user_activity']
Perhaps this is due to using beam.Map rather than beam.FlatMap for a function where you wish for multiple elements to be returned (yielded?)
Related
I'm building an Apache Beam pipeline using GCP Dataflow to process incoming events which need to be written in separate BigQuery tables depending on the content of the event. The decision of which table the data needs to be written to happens in one of the stages of the pipeline. My problem is how do I dynamicallys ett he name of the table that the data needs to go into. Also, in some cases, data needs to be written to two tables, after applying a transform.
I have gone through the solutions posted on these links, but it seems that they could be for old versions of google-cloud/apache-beam and are not working for me:
Dynamically set bigquery table id in dataflow pipeline
Writing different values to different BigQuery tables in Apache Beam
Attaching a sample pipeline using DirectRunner where I tried to follow the 2nd link mentioned above:
#Standard Python Imports
import argparse
import logging
import json
#3rd Party Imports
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions, StandardOptions
from apache_beam.options.pipeline_options import PipelineOptions
def transform_entry(line):
return json.loads(line)
def getTableName(entry):
if (entry["tablename"] == "table1"):
return "table1"
else:
return "table2"
def getRow(entry):
return entry["dataRow"]
def run(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('--temp_location',
default='<<TEMPORARY LOCATION>>')
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args)
pipeline_options.view_as(StandardOptions).streaming = True
with beam.Pipeline(options=pipeline_options) as p:
writeData = (p
| 'ReadInput' >> beam.io.ReadFromText('./sample_input.json')
| 'Parse' >> beam.Map(transform_entry))
eventRow = (writeData
| 'Get Data Row' >> beam.map(getRow)
| 'Write Event Row' >> beam.io.gcp.bigquery.WriteToBigQuery(
project='<<GCP PROJECT>>',
dataset='<<DATASET NAME>>',
table=getTableName,
write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND
))
print(eventRow)
if __name__ == '__main__':
logging.getLogger().setLevel(logging.ERROR)
run()
Could someone please help me out with this?
Attaching the traceback here:
/home/animesh/.local/lib/python3.10/site-packages/apache_beam/io/gcp/bigquery.py:1992: BeamDeprecationWarning: options is deprecated since First stable release. References to <pipeline>.options will not be supported
is_streaming_pipeline = p.options.view_as(StandardOptions).streaming
<apache_beam.io.gcp.bigquery.WriteResult object at 0x7f2421660100>
Traceback (most recent call last):
File "apache_beam/runners/common.py", line 1417, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 623, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1571, in apache_beam.runners.common._OutputHandler.handle_process_outputs
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/io/gcp/bigquery_tools.py", line 1521, in process
yield (self.destination(element, *side_inputs), element)
File "/home/animesh/Documents/cliqmetrics/logger/dataflow-pipeline/stackques/stackpipe.py", line 85, in getTableName
KeyError: 'tablename'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/animesh/Documents/cliqmetrics/logger/dataflow-pipeline/stackques/stackpipe.py", line 56, in <module>
run()
File "/home/animesh/Documents/cliqmetrics/logger/dataflow-pipeline/stackques/stackpipe.py", line 37, in run
with beam.Pipeline(options=pipeline_options) as p:
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/pipeline.py", line 600, in __exit__
self.result = self.run()
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/pipeline.py", line 553, in run
self._options).run(False)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/pipeline.py", line 577, in run
return self.runner.run_pipeline(self, self._options)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/direct/direct_runner.py", line 131, in run_pipeline
return runner.run_pipeline(pipeline, options)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 201, in run_pipeline
self._latest_run_result = self.run_via_runner_api(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 222, in run_via_runner_api
return self.run_stages(stage_context, stages)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 453, in run_stages
bundle_results = self._execute_bundle(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 781, in _execute_bundle
self._run_bundle(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 1010, in _run_bundle
result, splits = bundle_manager.process_bundle(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/fn_runner.py", line 1346, in process_bundle
result_future = self._worker_handler.control_conn.push(process_bundle_req)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/portability/fn_api_runner/worker_handlers.py", line 379, in push
response = self.worker.do_instruction(request)
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/worker/sdk_worker.py", line 596, in do_instruction
return getattr(self, request_type)(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/worker/sdk_worker.py", line 634, in process_bundle
bundle_processor.process_bundle(instruction_id))
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/worker/bundle_processor.py", line 1003, in process_bundle
input_op_by_transform_id[element.transform_id].process_encoded(
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/runners/worker/bundle_processor.py", line 227, in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py", line 526, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 528, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 237, in apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 240, in apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 1021, in apache_beam.runners.worker.operations.SdfProcessSizedElements.process
File "apache_beam/runners/worker/operations.py", line 1030, in apache_beam.runners.worker.operations.SdfProcessSizedElements.process
File "apache_beam/runners/common.py", line 1432, in apache_beam.runners.common.DoFnRunner.process_with_sized_restriction
File "apache_beam/runners/common.py", line 817, in apache_beam.runners.common.PerWindowInvoker.invoke_process
File "apache_beam/runners/common.py", line 981, in apache_beam.runners.common.PerWindowInvoker._invoke_process_per_window
File "apache_beam/runners/common.py", line 1581, in apache_beam.runners.common._OutputHandler.handle_process_outputs
File "apache_beam/runners/common.py", line 1694, in apache_beam.runners.common._OutputHandler._write_value_to_tag
File "apache_beam/runners/worker/operations.py", line 240, in apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 907, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 908, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1419, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1491, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1417, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 623, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1581, in apache_beam.runners.common._OutputHandler.handle_process_outputs
File "apache_beam/runners/common.py", line 1694, in apache_beam.runners.common._OutputHandler._write_value_to_tag
File "apache_beam/runners/worker/operations.py", line 240, in apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 907, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 908, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1419, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1491, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1417, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 623, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1581, in apache_beam.runners.common._OutputHandler.handle_process_outputs
File "apache_beam/runners/common.py", line 1694, in apache_beam.runners.common._OutputHandler._write_value_to_tag
File "apache_beam/runners/worker/operations.py", line 240, in apache_beam.runners.worker.operations.SingletonElementConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 907, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 908, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1419, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1507, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1417, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 623, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1571, in apache_beam.runners.common._OutputHandler.handle_process_outputs
File "/home/animesh/.local/lib/python3.10/site-packages/apache_beam/io/gcp/bigquery_tools.py", line 1521, in process
yield (self.destination(element, *side_inputs), element)
File "/home/animesh/Documents/cliqmetrics/logger/dataflow-pipeline/stackques/stackpipe.py", line 85, in getTableName
KeyError: "tablename [while running 'Write Event Row/_StreamToBigQuery/AppendDestination']"
Your function and the way to apply dynamic table name based on current element in the PCollection and a function are corrects, but you have a problem in the current element in your PCollection.
You have a KeyError in the Dict inside your PCollection, the key tablename seems not to be present.
You can add a mock instead of ReadFromText in order to be sure the expected this key is present and your input PCollection of Dict is created as expected : you can use beam.Create([{'field_name':'field_value'}]) for example.
So you will test more easily the write to BQ part with dynamic table name.
I'm developing an application for Windows operating systems written in Python 3.8 and which makes use of the nnunet library (https://pypi.org/project/nnunet/) which uses multiprocessing. I have tested the script and it works correctly.
Now I'm trying to package everything with pyinstaller v5.7.0. The creation of the .exe is successful but when I run it I get the following error:
Traceback (most recent call last):
File "main.py", line 344, in <module>
File "nnunet\inference\predict.py", line 694, in predict_from_folder
File "nnunet\inference\predict.py", line 496, in predict_cases_fastest
File "nnunet\inference\predict.py", line 123, in preprocess_multithreaded
File "multiprocess\process.py", line 121, in start
File "multiprocess\context.py", line 224, in _Popen
File "multiprocess\context.py", line 327, in _Popen
File "multiprocess\popen_spawn_win32.py", line 93, in __init__
File "multiprocess\reduction.py", line 70, in dump
File "dill\_dill.py", line 394, in dump
File "pickle.py", line 487, in dump
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
Traceback (most recent call last):
File "main.py", line 341, in <module>
File "pickle.py", line 997, in _batch_setitems
File "D:\MyProject\venv\Lib\site-packages\PyInstaller\hooks\rthooks\pyi_rth_multiprocessing.py", line 49, in _freeze_support
File "dill\_dill.py", line 388, in save
spawn.spawn_main(**kwds)
File "pickle.py", line 560, in save
File "pickle.py", line 901, in save_tuple
File "dill\_dill.py", line 388, in save
File "multiprocessing\spawn.py", line 116, in spawn_main
File "pickle.py", line 560, in save
File "multiprocessing\spawn.py", line 126, in _main
File "dill\_dill.py", line 1427, in save_instancemethod0
EOFError: Ran out of input
[588] Failed to ex File "pickle.py", line 692, in save_reduce
ecute script 'main' d File "dill\_dill.py", line 388, in save
ue to unhandled File "pickle.py", line 560, in save
exception!
File "pickle.py", line 886, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 687, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1698, in save_type
File "dill\_dill.py", line 1070, in _save_with_postproc
File "pickle.py", line 692, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "pickle.py", line 901, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "pickle.py", line 886, in save_tuple
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1698, in save_type
File "dill\_dill.py", line 1084, in _save_with_postproc
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 603, in save
File "pickle.py", line 717, in save_reduce
File "dill\_dill.py", line 388, in save
File "pickle.py", line 560, in save
File "dill\_dill.py", line 1186, in save_module_dict
File "pickle.py", line 971, in save_dict
File "pickle.py", line 997, in _batch_setitems
File "dill\_dill.py", line 388, in save
File "pickle.py", line 578, in save
File "PyInstaller\loader\pyimod01_archive.py", line 76, in __getattr__
AssertionError
[4392] Failed to execute script 'main' due to unhandled exception!
Below is the code of my python script:
#==============================
# main.py
#==============================
from multiprocessing import freeze_support
from nnunet.inference.predict import predict_from_folder
if __name__ == "__main__":
freeze_support()
...
predict_from_folder(...)
...
Below is the code of the nnunet library that triggers the error:
#==============================
# nnunet\inference\predict.py
#==============================
def preprocess_multithreaded(trainer, list_of_lists, output_files, num_processes=2, segs_from_prev_stage=None):
if segs_from_prev_stage is None:
segs_from_prev_stage = [None] * len(list_of_lists)
num_processes = min(len(list_of_lists), num_processes)
classes = list(range(1, trainer.num_classes))
assert isinstance(trainer, nnUNetTrainer)
q = Queue(1)
processes = []
for i in range(num_processes):
pr = Process(
target=preprocess_save_to_queue,
args=(
trainer.preprocess_patient,
q,
list_of_lists[i::num_processes],
output_files[i::num_processes],
segs_from_prev_stage[i::num_processes],
classes,
trainer.plans['transpose_forward']
)
)
pr.start() ## <------------ The error is generated here!!!!!!!!!!!!!
processes.append(pr)
try:
end_ctr = 0
while end_ctr != num_processes:
item = q.get()
if item == "end":
end_ctr += 1
continue
else:
yield item
finally:
for p in processes:
if p.is_alive():
p.terminate()
p.join()
q.close()
def predict_cases_fastest(...):
...
pool = Pool(num_threads_nifti_save)
...
preprocessing = preprocess_multithreaded(
trainer,
list_of_lists,
cleaned_output_files,
num_threads_preprocessing,
segs_from_prev_stage
)
...
pool.starmap_async(...)
...
pool.close()
pool.join()
def predict_from_folder(...):
...
return predict_cases_fastest(...)
if __name__ == "__main__":
...
Edit 03-02-2023
I have created a public project with which it is possible to reproduce the reported problem: https://gitlab.com/carlopoletto/nnunet_pyinstaller_problem
In the ./scripts folder there are some scripts to install everything and run the tests:
./scripts/install: dependency installation
./scripts/dist: creating the executable with pyinstaller
./scripts/run_py: running the python script (NB: this script automatically delete the ./temp folder and recreate it by copying the contents of ./data)
./scripts/run_exe: running the executable created with ./scripts/dist (NB: this script automatically delete the ./temp folder and recreate it by copying the contents of ./data)
The problem appears to be internal to the nnunet library. I don't know if this problem can be solved by properly configuring pyinstaller.
I wrote code to inject data from CSV file to Google's BigQuery. I used apache beam for the pipeline.
This is the pipeline code:
list_of_data = open_file()
DistrictAllocationAndListStore_data = (p
| 'CreateDictData from DistrictAllocationAndListStore File' >> beam.Create(list_of_data)
| 'RenameDictKey DistrictAllocationAndListStore' >> beam.Map(rename_key)
| 'ChangeDataType DistrictAllocationAndListStore' >> beam.Map(convert_types_DistrictAllocationAndListStore)
| 'Write DistrictAllocationAndListStore' >> WriteToText('output/data-branchessap', '.txt')
)
# Write to BQ
DistrictAllocationAndListStore_data | 'Write to BQ DistrictAllocationAndListStore' >> beam.io.WriteToBigQuery(
table=table_id_tender,
dataset=dataset_id,
project=project_id,
schema=schema_tenders_master,
write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
# batch_size=int(100)
)
And this is the convert_types_DistrictAllocationAndListStore method:
def convert_types_DistrictAllocationAndListStore(data):
"""Converts string values to their appropriate type."""
date_format = '%Y-%m-%d'
data['site_code'] = str(data['site_code']) if 'site_code' in data else None
data['store_name'] = str(data['store_name']) if 'store_name' in data else None
data['city'] = str(data['city']) if 'city' in data else None
data['type'] = str(data['type']) if 'type' in data else None
data['region_no'] = str(data['region_no']) if 'region_no' in data else None
if data.get("opening_date") != "":
date = datetime.datetime.strptime(data.get("opening_date"), date_format)
data['opening_date'] = str(date.date())
data['opening_date_year'] = str(date.year)
data['opening_date_month'] = str(date.month)
data['opening_date_day'] = str(date.day)
data['opening_date_dayname'] = str(date.strftime("%A"))
data['opening_date_weeks'] = str(date.strftime("%W"))
else:
data['opening_date'] = None
data['opening_date_year'] = ""
data['opening_date_month'] = ""
data['opening_date_day'] = ""
data['opening_date_dayname'] = ""
data['opening_date_weeks'] = ""
return data
However, when i commented out the Write To BQ code and write to local (using local runner), the code run successfully without error. But when I try to write it to BQ (run with DataFlow runner) it got an error:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py", line 284, in _execute
response = task()
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py", line 357, in <lambda>
lambda: self.create_worker().do_instruction(request), request)
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py", line 601, in do_instruction
return getattr(self, request_type)(
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/sdk_worker.py", line 639, in process_bundle
bundle_processor.process_bundle(instruction_id))
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py", line 993, in process_bundle
input_op_by_transform_id[element.transform_id].process_encoded(
File "/usr/local/lib/python3.8/site-packages/apache_beam/runners/worker/bundle_processor.py", line 222, in process_encoded
self.output(decoded_value)
File "apache_beam/runners/worker/operations.py", line 351, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 353, in apache_beam.runners.worker.operations.Operation.output
File "apache_beam/runners/worker/operations.py", line 215, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 712, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 713, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1225, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1290, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1223, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 571, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1386, in apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 215, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 712, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 713, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1225, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1290, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1223, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 571, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1386, in apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 215, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 712, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 713, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1225, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1290, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1223, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 571, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1386, in apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 215, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 712, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 713, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1225, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1290, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1223, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 571, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "apache_beam/runners/common.py", line 1386, in apache_beam.runners.common._OutputProcessor.process_outputs
File "apache_beam/runners/worker/operations.py", line 215, in apache_beam.runners.worker.operations.SingletonConsumerSet.receive
File "apache_beam/runners/worker/operations.py", line 712, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/worker/operations.py", line 713, in apache_beam.runners.worker.operations.DoOperation.process
File "apache_beam/runners/common.py", line 1225, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 1306, in apache_beam.runners.common.DoFnRunner._reraise_augmented
File "apache_beam/runners/common.py", line 1223, in apache_beam.runners.common.DoFnRunner.process
File "apache_beam/runners/common.py", line 572, in apache_beam.runners.common.SimpleInvoker.invoke_process
File "/media/arroganthooman/DATA/Fikri/UI/Magang/Script/venv/lib/python3.8/site-packages/apache_beam/transforms/core.py", line 1570, in <lambda>
wrapper = lambda x: [fn(x)]
File "sb-fin-district_allocation_list_store.py", line 104, in convert_types_DistrictAllocationAndListStore
NameError: name 'datetime' is not defined [while running 'ChangeDataType DistrictAllocationAndListStore-ptransform-570']
it seems the datetime are not imported, but i have imported it in the top of my code. Any solution?
You can try to put import datetime within the function.
Did you import dateime at the top of your script?
Using the pipeline option --save_main_session=True resolved this error. No need to import within the function.
References: https://cloud.google.com/dataflow/docs/resources/faq#programming_with_the_cloud_dataflow_sdk_for_python
Traceback (most recent call last):
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\web.py", line 1699, in _execute
result = await result
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 742, in run
yielded = self.gen.throw(*exc_info) # type: ignore
File "C:\Users\DELL\anaconda3\lib\site-packages\notebook\services\sessions\handlers.py", line 72, in post
type=mtype))
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 735, in run
value = future.result()
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 742, in run
yielded = self.gen.throw(*exc_info) # type: ignore
File "C:\Users\DELL\anaconda3\lib\site-packages\notebook\services\sessions\sessionmanager.py", line 88, in create_session
kernel_id = yield self.start_kernel_for_session(session_id, path, name, type, kernel_name)
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 735, in run
value = future.result()
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 742, in run
yielded = self.gen.throw(*exc_info) # type: ignore
File "C:\Users\DELL\anaconda3\lib\site-packages\notebook\services\sessions\sessionmanager.py", line 101, in start_kernel_for_session
self.kernel_manager.start_kernel(path=kernel_path, kernel_name=kernel_name)
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 735, in run
value = future.result()
File "C:\Users\DELL\anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
yielded = next(result)
File "C:\Users\DELL\anaconda3\lib\site-packages\notebook\services\kernels\kernelmanager.py", line 168, in start_kernel
super(MappingKernelManager, self).start_kernel(**kwargs)
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_client\multikernelmanager.py", line 110, in start_kernel
km.start_kernel(**kwargs)
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_client\manager.py", line 240, in start_kernel
self.write_connection_file()
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_client\connect.py", line 476, in write_connection_file
kernel_name=self.kernel_name
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_client\connect.py", line 141, in write_connection_file
with secure_write(fname) as f:
File "C:\Users\DELL\anaconda3\lib\contextlib.py", line 112, in __enter__
return next(self.gen)
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_core\paths.py", line 424, in secure_write
win32_restrict_file_to_user(fname)
File "C:\Users\DELL\anaconda3\lib\site-packages\jupyter_core\paths.py", line 359, in win32_restrict_file_to_user
import win32api
ImportError: DLL load failed: %1 is not a valid Win32 application.
check out permissions to that folder inside users\DELL\
I always make sure to install the program folder closer to the root c:\anaconda**
$ python -m spacy download en
3:05:52Exception:
Traceback (most recent call last):
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/cli/base_command.py", line 143, in mainstatus = self.run(options, args)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/commands/install.py", line 318, in run
resolver.resolve(requirement_set)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/resolve.py", line 102, in resolve
self._resolve_one(requirement_set, req)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/resolve.py", line 256, in _resolve_one
abstract_dist = self._get_abstract_dist_for(req_to_install)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/resolve.py", line 209, in _get_abstract_dist_for
self.require_hashes
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/operations/prepare.py", line 283, in prepare_linked_requirement progress_bar=self.progress_bar
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 836, in unpack_url
progress_bar=progress_bar
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 673, in unpack_http_url
progress_bar)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 897, in _download_http_url
_download_url(resp, link, content_file, hashes, progress_bar)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 619, in _download_url
consume(downloaded_chunks)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/utils/misc.py", line 848, in consume
deque(iterator, maxlen=0)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 585, in written_chunks
for chunk in chunks:
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/utils/ui.py", line 159, in iter
for x in it:
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_internal/download.py", line 574, in resp_read
decode_content=False):
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 465, in stream
data = self.read(amt=amt, decode_content=decode_content)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 430, in read
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
File "/usr/lib/python2.7/contextlib.py", line 35, in __exit__
self.gen.throw(type, value, traceback)
File "/home/yesha/.local/lib/python2.7/site-packages/pip/_vendor/urllib3/response.py", line 345, in _error_catcher
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
ReadTimeoutError: HTTPSConnectionPool(host='github-production-release-asset-2e65be.s3.amazonaws.com', port=443): Read timed out.
The server (PyPi) spend too much time to answer from your client.
Check the configuration of pip in your home directory.
You can increase the timeout value:
[global]
timeout = 60
And retry later…