I'm working on the Titanic dataset and one-hot applied to all columns. So everything 0 and 1.
However, I get the following error in train operation. I tried many things but I could not find the solution. I tried to explain the error as much as I can, I hope you can understand the problem easily. Thank you for your time.
Traceback (most recent call last):
File "titanic_classifier.py", line 277, in <module>
fire.Fire()
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 138, in Fire
component_trace = _Fire(component, args, parsed_flag_args, context, name)
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 468, in _Fire
target=component.__name__)
File "C:\Users\PC\Anaconda3\lib\site-packages\fire\core.py", line 672, in _CallAndUpdateTrace
component = fn(*varargs, **kwargs)
File "titanic_classifier.py", line 220, in train
n_sample = nsample if nsample is not None else m['n_sample'])
File "C:\Users\PC\Desktop\x\source\run_train.py", line 248, in run_train
dfXy, dfXytest = train(model_dict, dfXy, cols, post_process_fun)
File "C:\Users\PC\Desktop\x\source\run_train.py", line 125, in train
data_pars['train'] = {'Xtrain' : dfX[colsX].iloc[:itrain, :],
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2908, in __getitem__
indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1254, in _get_listlike_indexer
self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
File "C:\Users\PC\Anaconda3\lib\site-packages\pandas\core\indexing.py", line 1304, in _validate_read_indexer
raise KeyError(f"{not_found} not in index")
KeyError: "['space_svd_0', 'interaction_svd_1', 'transit_svd_0', 'summary_svd_1', 'last_scraped_month', 'neighborhood_overview_svd_0', 'host_name_svd_0', 'house_rules_svd_1', 'house_rules_svd_0', 'first_review_month', 'transit_svd_1', 'last_review_year', 'interaction_svd_0', 'first_review_day', 'last_scraped_day', 'summary_svd_0', 'access_svd_1', 'first_review_year', 'notes_svd_0', 'name_svd_1', 'space_svd_1', 'neighborhood_overview_svd_1', 'host_since_year', 'amenities_svd_1', 'amenities_svd_0', 'host_about_svd_0', 'host_name_svd_1', 'notes_svd_1', 'name_svd_0', 'last_review_day', 'last_scraped_year', 'description_svd_0', 'host_since_day', 'last_review_month', 'access_svd_0', 'host_about_svd_1', 'host_since_month', 'description_svd_1'] not in index"
The part of the code where the error came from run_train.py:
data_pars['train'] = {'Xtrain' : dfX[colsX].iloc[:itrain, :],
'ytrain' : dfX[coly].iloc[:itrain],
'Xtest' : dfX[colsX].iloc[itrain:ival, :],
'ytest' : dfX[coly].iloc[itrain:ival],
'Xval' : dfX[colsX].iloc[ival:, :],
'yval' : dfX[coly].iloc[ival:]
}
Related
Error message
Traceback (most recent call last):
File "tools/train.py", line 244, in <module>
main()
File "tools/train.py", line 233, in main
train_detector(
File "/home/christ/dev/repos/railsight/mmdetection-2.25.3/mmdet/apis/train.py", line 244, in train_detector
runner.run(data_loaders, cfg.workflow)
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 130, in run
epoch_runner(data_loaders[i], **kwargs)
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/mmcv/runner/epoch_based_runner.py", line 47, in train
for i, data_batch in enumerate(self.data_loader):
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 368, in __iter__
return self._get_iterator()
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 314, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 965, in __init__
self._reset(loader, first_iter=True)
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 996, in _reset
self._try_put_index()
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1230, in _try_put_index
index = self._next_index()
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 521, in _next_index
return next(self._sampler_iter) # may raise StopIteration
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/sampler.py", line 226, in __iter__
for idx in self.sampler:
File "/home/christ/dev/repos/railsight/mmdetection-2.25.3/mmdet/datasets/samplers/group_sampler.py", line 36, in __iter__
indices = np.concatenate(indices)
File "<__array_function__ internals>", line 180, in concatenate
ValueError: need at least one array to concatenate
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/multiprocessing/popen_fork.py", line 27, in poll
pid, sts = os.waitpid(self.pid, flag)
File "/home/christ/miniconda3/envs/mmdetection/lib/python3.8/site-packages/torch/utils/data/_utils/signal_handling.py", line 66, in handler
_error_if_any_worker_fails()
RuntimeError: DataLoader worker (pid 35413) is killed by signal: Terminated.
This is the error message I am faced with when trying to train using SSD with MMDetection. I have checked through my dataset and it works with faster_rcnn so I am not understanding why I am having such an issue with the SSD training. Any advice would be great!
_base_ = '../ssd/ssd300_coco.py'
dataset_type = 'CocoDataset'
classes = ('pantograph',)
data = dict(
train=dict(
img_prefix='configs/pantograph/train/',
classes=classes,
ann_file='configs/pantograph/train/SSDTrain.json',
dataset=dict(
ann_file='configs/pantograph/train/SSDTrain.json',
img_prefix='configs/pantograph/train/')),
val=dict(
img_prefix='configs/pantograph/val/',
classes=classes,
ann_file='configs/pantograph/val/SSDVal.json'),
test=dict(
img_prefix='configs/pantograph/test/',
classes=classes,
ann_file='configs/pantograph/test/SSDTest.json'))
Above is my custom SSD config that I tried to run. I have double checked the file locations and made sure they are all correct.
mmdet has a very good community, you can try to look at this link to solve your problem. Shortly, you should change your classes = ('pantograph',) as the following codes
cfg.metainfo = {
'classes': ('balloon', ),
'palette': [
(220, 20, 60),
]
}
I'm trying to train StyleGAN2, but everytime I'm starting to train I'm getting this error. It's weird that it can't allocate 0 bytes.
Log:
Traceback (most recent call last):
File "train.py", line 561, in <module>
main()
File "train.py", line 553, in main
run_training(**vars(args))
File "train.py", line 416, in run_training
run_desc, training_options = setup_training_options(**hyperparam_options)
File "train.py", line 105, in setup_training_options
dataset_obj = dataset.load_dataset(**args.train_dataset_args) # try to load the data and see what comes out
File "C:\Temporary Software\stylegan2-ada\training\dataset.py", line 231, in load_dataset
mirror_augment=mirror_augment, repeat=repeat, shuffle=shuffle)
File "C:\Temporary Software\stylegan2-ada\training\dataset.py", line 114, in __init__
self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var')
File "C:\Temporary Software\stylegan2-ada\dnnlib\tflib\tfutil.py", line 234, in create_var_with_large_initial_value
zeros = tf.zeros(initial_value.shape, initial_value.dtype)
File "C:\Users\Andrew\anaconda3\envs\stylegan\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1871, in zeros
output = _constant_if_small(zero, shape, dtype, name)
File "C:\Users\Andrew\anaconda3\envs\stylegan\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1829, in _constant_if_small
return constant(value, shape=shape, dtype=dtype, name=name)
File "C:\Users\Andrew\anaconda3\envs\stylegan\lib\site-packages\tensorflow\python\framework\constant_op.py", line 246, in constant
allow_broadcast=True)
File "C:\Users\Andrew\anaconda3\envs\stylegan\lib\site-packages\tensorflow\python\framework\constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\Andrew\anaconda3\envs\stylegan\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 464, in make_tensor_proto
nparray = np.empty(shape, dtype=np_dt)
numpy.core._exceptions.MemoryError: Unable to allocate 0 bytes for an array with shape (1073741824, 0) and data type float32
Is it a numpy bug or I'm doing something wrong? I'd be very much appreciated if you could help me
This was also reported in https://github.com/numpy/numpy/issues/16410, and fixed in https://github.com/numpy/numpy/pull/21477, which is probably included in numpy version 1.23.
I am using the tensorflow data API to try and do some rejection sampling for my unbalanced data set.
I have run the code on my personal computer and it seems to work as I expect it to, however, when I run the code on my University's cluster I get a type error that I can't seem to understand. I have tried recasting and I get the same error.
I am still learning how to use this API and I'm still not 100% clear on if this is the best way to achieve what I want, so I also welcome any advice on how I implemented the rejection sampling (this could very well be the reason why I get error since I don't fully understand yet).
This is how I am loading in the data to the dataset:
data = np.loadtxt("my_data.dat")
features = data[:, 1:10]
labels = data[:, 0]
labels[labels == -1] = 0
assert features.shape[0] == labels.shape[0]
dataset_size = len(features)
dataset = tf.data.Dataset.from_tensor_slices((features.astype('float32'),
labels.astype('int32')))
dataset = dataset.shuffle(buffer_size=dataset_size)
the error occurs when I read here:
train_size = int((2/3.0)*dataset_size)
tr_dataset = dataset.take(train_size)
tr_dataset = (tr_dataset.apply(
tf.contrib.data.rejection_resample(
class_func=lambda _, c: c, target_dist=[0.5, 0.5],
seed=42)).map(lambda a, b: b)).batch(100)
This is the error:
Traceback (most recent call last):
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 510, in _apply_op_helper
preferred_dtype=default_dtype)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1094, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 931, in _TensorTensorConversionFunction
(dtype.name, t.dtype.name, str(t)))
ValueError: Tensor conversion requested dtype int32 for Tensor with dtype int64: 'Tensor("Sum:0", shape=(2,), dtype=int64)'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 185, in <module>
seed=42))).batch(100)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1109, in apply
dataset = transformation_func(self)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/resampling.py", line 74, in _apply_fn
target_dist_t, class_values_ds)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/resampling.py", line 183, in _estimate_initial_dist_ds
update_estimate_and_tile))
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1109, in apply
dataset = transformation_func(self)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/scan_ops.py", line 172, in _apply_fn
return _ScanDataset(dataset, initial_state, scan_func)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/scan_ops.py", line 74, in __init__
add_to_graph=False)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1459, in __init__
self._function._create_definition_if_needed() # pylint: disable=protected-access
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 337, in _create_definition_if_needed
self._create_definition_if_needed_impl()
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 346, in _create_definition_if_needed_impl
self._capture_by_value, self._caller_device)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/function.py", line 863, in func_graph_from_py_func
outputs = func(*func_graph.inputs)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/data/ops/dataset_ops.py", line 1392, in tf_data_structured_function_wrapper
ret = func(*nested_args)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/resampling.py", line 176, in update_estimate_and_tile
c, num_examples_per_class_seen)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/contrib/data/python/ops/resampling.py", line 212, in _estimate_data_distribution
array_ops.one_hot(c, num_classes, dtype=dtypes.int64), 0))
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 297, in add
"Add", x=x, y=y, name=name)
File "/home/user/.conda/envs/tensorflowcpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 546, in _apply_op_helper
inferred_from[input_arg.type_attr]))
TypeError: Input 'y' of 'Add' Op has type int64 that does not match type int32 of argument 'x'.
Trying out Python package tsfresh I run into issues in the first steps. Given a series how to (automatically) make features for it? This snippet produces different errors based on which part I try.
import tsfresh
import pandas as pd
import numpy as np
#tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind='float64', max_timeshift=50, rolling_direction=1)
#rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
tfX, tfy = tsfresh.utilities.dataframe_functions.make_forecasting_frame(pd.Series(np.random.randn(1000)/50), kind=1, max_timeshift=50, rolling_direction=1)
rf = tsfresh.extract_relevant_features(tfX, y=tfy, n_jobs=1, column_id='id')
The errors are in the first case
""" Traceback (most recent call last): File "C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 350, in _f
result = [("", func(data))] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 193, in variance_larger_than_standard_deviation
y = np.var(x) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core\fromnumeric.py",
line 3157, in var
**kwargs) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\numpy\core_methods.py",
line 110, in _var
arrmean, rcount, out=arrmean, casting='unsafe', subok=False) TypeError: unsupported operand type(s) for /: 'str' and 'int' """
and in the second case
""" Traceback (most recent call last): File
"C:\Users\user\Anaconda3\envs\env1\lib\multiprocessing\pool.py", line
119, in worker
result = (True, func(*args, **kwds)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 38, in _function_with_partly_reduce
results = list(itertools.chain.from_iterable(results)) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\utilities\distribution.py",
line 37, in
results = (map_function(chunk, **kwargs) for chunk in chunk_list) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 358, in _do_extraction_on_chunk
return list(_f()) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\extraction.py",
line 345, in _f
result = func(data, param=parameter_list) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 1752, in friedrich_coefficients
coeff = _estimate_friedrich_coefficients(x, m, r) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\tsfresh\feature_extraction\feature_calculators.py",
line 145, in _estimate_friedrich_coefficients
result.dropna(inplace=True) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\frame.py",
line 4598, in dropna
result = self.loc(axis=axis)[mask] File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1500, in getitem
return self._getitem_axis(maybe_callable, axis=axis) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexing.py",
line 1859, in _getitem_axis
if is_iterator(key): File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\dtypes\inference.py",
line 157, in is_iterator
return hasattr(obj, 'next') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\generic.py",
line 5065, in getattr
if self._info_axis._can_hold_identifiers_and_holds_name(name): File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 3984, in _can_hold_identifiers_and_holds_name
return name in self File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\category.py",
line 327, in contains
return contains(self, key, container=self._engine) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\arrays\categorical.py",
line 188, in contains
loc = cat.categories.get_loc(key) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 770, in get_loc
start, stop = self._find_non_overlapping_monotonic_bounds(key) File
"C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 717, in _find_non_overlapping_monotonic_bounds
start = self._searchsorted_monotonic(key, 'left') File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\interval.py",
line 681, in _searchsorted_monotonic
return sub_idx._searchsorted_monotonic(label, side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\indexes\base.py",
line 4755, in _searchsorted_monotonic
return self.searchsorted(label, side=side) File "C:\Users\user\Anaconda3\envs\env1\lib\site-packages\pandas\core\base.py",
line 1501, in searchsorted
return self._values.searchsorted(value, side=side, sorter=sorter) TypeError: Cannot cast array data from dtype('float64') to
dtype('
np.version, tsfresh.version are ('1.15.4', 'unknown'). I installed tsfresh using conda, probably from conda-forge. I am on Windows 10. Using another kernel with np.version, tsfresh.version ('1.15.4', '0.11.2') lead to the same results.
Trying the first couple of cells from timeseries_forecasting_basic_example.ipynb yields the casting error as well.
Fixed it. Either the version on conda(-forge) or one of the dependencies was the issue. So using "conda uninstall tsfresh", "conda install patsy future six tqdm" and "pip install tsfresh" combined did the trick.
I can't figure out what is the problem in the given code:
I am using dask to merge several dataframes. After merging I want to find the unique values from one of the column. I am getting type error while converting from dask to pandas using unique().compute(). But, I cannot seem to find what actually is the problem. It says that str cannot be assigned as int but, in some of the files the code passses through and in some it doesn't. I also cannot find the problem with data structure.
Any suggestions??
import pandas as pd
import dask.dataframe as dd
# Everything is fine until merging
# I have put several print(markers) to find the problem code
print('dask cols')
print(df_by_dask_merged.columns)
print()
print(dask_cols)
print()
print('find unique contigs values in dask dataframe')
pd_df = df_by_dask_merged['contig']
print(pd_df)
print()
print('mark 02')
# this is the problem code ??
pd_df_contig = pd_df.unique().compute()
print(pd_df_contig)
print('mark 03')
Output on Terminal:
dask cols
Index(['contig', 'pos', 'ref', 'all-alleles', 'ms01e_PI', 'ms01e_PG_al',
'ms02g_PI', 'ms02g_PG_al', 'all-freq'],
dtype='object')
['contig', 'pos', 'ref', 'all-alleles', 'ms01e_PI', 'ms01e_PG_al', 'ms02g_PI', 'ms02g_PG_al', 'all-freq']
find unique contigs values in dask dataframe
Dask Series Structure:
npartitions=1
int64
...
Name: contig, dtype: int64
Dask Name: getitem, 52 tasks
mark 02
Traceback (most recent call last):
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2145, in get_value
return tslib.get_value_box(s, key)
File "pandas/tslib.pyx", line 880, in pandas.tslib.get_value_box (pandas/tslib.c:17368)
File "pandas/tslib.pyx", line 889, in pandas.tslib.get_value_box (pandas/tslib.c:17042)
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "merge_haplotype.py", line 305, in <module>
main()
File "merge_haplotype.py", line 152, in main
pd_df_contig = pd_df.unique().compute()
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/base.py", line 155, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/base.py", line 404, in compute
results = get(dsk, keys, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/threaded.py", line 75, in get
pack_exception=pack_exception, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 521, in get_async
raise_exception(exc, tb)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/compatibility.py", line 67, in reraise
raise exc
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 290, in execute_task
result = _execute_task(task, data)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/local.py", line 271, in _execute_task
return func(*args2)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/dataframe/core.py", line 3404, in apply_and_enforce
df = func(*args, **kwargs)
File "/home/everestial007/anaconda3/lib/python3.5/site-packages/dask/utils.py", line 687, in __call__
return getattr(obj, self.method)(*args, **kwargs)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/frame.py", line 4133, in apply
return self._apply_standard(f, axis, reduce=reduce)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/frame.py", line 4229, in _apply_standard
results[i] = func(v)
File "merge_haplotype.py", line 249, in <lambda>
apply(lambda row : update_cols(row, sample_name), axis=1, meta=(int))
File "merge_haplotype.py", line 278, in update_cols
if 'N|N' in df_by_dask[sample_name + '_PG_al']:
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/core/series.py", line 601, in __getitem__
result = self.index.get_value(self, key)
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2153, in get_value
raise e1
File "/home/everestial007/.local/lib/python3.5/site-packages/pandas/indexes/base.py", line 2139, in get_value
tz=getattr(series.dtype, 'tz', None))
File "pandas/index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas/index.c:3338)
File "pandas/index.pyx", line 113, in pandas.index.IndexEngine.get_value (pandas/index.c:3041)
File "pandas/index.pyx", line 161, in pandas.index.IndexEngine.get_loc (pandas/index.c:4024)
File "pandas/src/hashtable_class_helper.pxi", line 732, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13161)
File "pandas/src/hashtable_class_helper.pxi", line 740, in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:13115)
KeyError: ('ms02g_PG_al', 'occurred at index 0')