I am using TuriCreate to create model to classify a human activity, but I get error when I try to run activity_classifier.create(...) method.
Code
This is what I did:
Load all data:
train_sf = tc.SFrame("data/cleaned_train_sframe")
valid_sf = tc.SFrame("data/cleaned_valid_sframe")
test_sf = tc.SFrame("data/cleaned_test_sframe")
Dividing the SFrame randomly into two smaller SFrames:
train, valid = tc.activity_classifier.util.random_split_by_session(train_sf, session_id='sessionId', fraction=0.9)
Trying to build and train my model:
model = tc.activity_classifier.create(dataset=train_sf,
session_id='sessionId',
target='activity',
features=["rotX", "rotY", "rotZ", "accelX", "accelY", "accelZ"],
prediction_window=50,
validation_set=valid_sf,
max_iterations=20)
Error
The third step raise the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [34], in <cell line: 1>()
----> 1 model = tc.activity_classifier.create(dataset=train_sf,
2 session_id='sessionId',
3 target='activity',
4 features=["rotX", "rotY", "rotZ", "accelX", "accelY", "accelZ"],
5 prediction_window=50,
6 validation_set=valid_sf,
7 max_iterations=20)
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/toolkits/activity_classifier/_activity_classifier.py:200, in create(dataset, session_id, target, features, prediction_window, validation_set, max_iterations, batch_size, verbose, random_seed)
197 options["_show_loss"] = False
198 options["random_seed"] = random_seed
--> 200 model.train(dataset, target, session_id, validation_set, options)
201 return ActivityClassifier(model_proxy=model, name=name)
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/extensions.py:305, in _ToolkitClass.__getattr__.<locals>.<lambda>(*args, **kwargs)
302 return _wrap_function_return(self._tkclass.get_property(name))
303 elif name in self._functions:
304 # is it a function?
--> 305 ret = lambda *args, **kwargs: self.__run_class_function(name, args, kwargs)
306 ret.__doc__ = (
307 "Name: " + name + "\nParameters: " + str(self._functions[name]) + "\n"
308 )
309 try:
File ~/Desktop/PFG/lib/python3.8/site-packages/turicreate/extensions.py:290, in _ToolkitClass.__run_class_function(self, fnname, args, kwargs)
288 # unwrap it
289 try:
--> 290 ret = self._tkclass.call_function(fnname, argument_dict)
291 except RuntimeError as exc:
292 # Expose C++ exceptions using ToolkitError.
293 raise _ToolkitError(exc)
File cy_model.pyx:35, in turicreate._cython.cy_model.UnityModel.call_function()
File cy_model.pyx:40, in turicreate._cython.cy_model.UnityModel.call_function()
ValueError: stod: no conversion
Does anyone know what the problem could be?
You can get passed this issue by setting the validation_set to None.
This does mean that you have no validation, but at least you can create your model.
Related
I am trying to create a feature extractor using from torchvision.models.feature_extraction import create_feature_extractor.
The model I am trying to use is from the vit_pytorch (link: https://github.com/lucidrains/vit-pytorch). The problem I face is that when I create a model from this lib:
from vit_pytorch import ViT
from torchvision.models.feature_extraction import create_feature_extractor
model = ViT(image_size=28,
patch_size=7,
num_classes=10,
dim=16,
depth=6,
heads=16,
mlp_dim=256,
dropout=0.1,
emb_dropout=0.1,
channels=1)
random_layer_name = 'transformer.layers.1.1.fn.net.4'
feature_extractor = create_feature_extractor(model,
return_nodes=random_layer_name)
and when trying to use the create_feature_extractor() on this model I always get this error:
RuntimeError Traceback (most recent call last)
Cell In[17], line 2
1 # torch.fx.wrap('len')
----> 2 feature_extractor = create_feature_extractor(model,
3 return_nodes=['transformer.layers.1.1.fn.net.4'])
File ~/Mokslas/AI/venv/lib/python3.10/site-packages/torchvision/models/feature_extraction.py:485, in create_feature_extractor(model, return_nodes, train_return_nodes, eval_return_nodes, tracer_kwargs, suppress_diff_warning)
483 # Instantiate our NodePathTracer and use that to trace the model
484 tracer = NodePathTracer(**tracer_kwargs)
--> 485 graph = tracer.trace(model)
487 name = model.__class__.__name__ if isinstance(model, nn.Module) else model.__name__
488 graph_module = fx.GraphModule(tracer.root, graph, name)
File ~/Mokslas/AI/venv/lib/python3.10/site-packages/torch/fx/_symbolic_trace.py:756, in Tracer.trace(self, root, concrete_args)
749 for module in self._autowrap_search:
750 _autowrap_check(
751 patcher, module.__dict__, self._autowrap_function_ids
752 )
753 self.create_node(
754 "output",
755 "output",
--> 756 (self.create_arg(fn(*args)),),
757 {},
758 type_expr=fn.__annotations__.get("return", None),
759 )
761 self.submodule_paths = None
762 finally:
File ~/Mokslas/AI/venv/lib/python3.10/site-packages/vit_pytorch/vit.py:115, in ViT.forward(self, img)
114 def forward(self, img):
--> 115 x = self.to_patch_embedding(img)
116 b, n, _ = x.shape
118 cls_tokens = repeat(self.cls_token, '1 1 d -> b 1 d', b = b)
File ~/Mokslas/AI/venv/lib/python3.10/site-packages/torch/fx/_symbolic_trace.py:734, in Tracer.trace.<locals>.module_call_wrapper(mod, *args, **kwargs)
727 return _orig_module_call(mod, *args, **kwargs)
729 _autowrap_check(
730 patcher,
731 getattr(getattr(mod, "forward", mod), "__globals__", {}),
732 self._autowrap_function_ids,
733 )
--> 734 return self.call_module(mod, forward, args, kwargs)
File ~/Mokslas/AI/venv/lib/python3.10/site-packages/torchvision/models/feature_extraction.py:83, in NodePathTracer.call_module(self, m, forward, args, kwargs)
...
--> 396 raise RuntimeError("'len' is not supported in symbolic tracing by default. If you want "
397 "this call to be recorded, please call torch.fx.wrap('len') at "
398 "module scope")
RuntimeError: 'len' is not supported in symbolic tracing by default. If you want this call to be recorded, please call torch.fx.wrap('len') at module scope
It doesn't matter which model I choose from that library or which layer or layers I choose to be outputed I always get the same error.
I have tried to add torch.fx.wrap('len') but the same problem persisted. I know I could try to solve it by using the hook methods, but is there a way to solve this problem so that I could still use the create_feature_extractor() functionality?
I just started to use Ray for handling the data.
I tried to install the code below,
import ray
ray.init(num_cpus = 16, ignore_reinit_error=True, object_store_memory = 10**10)
#ray.remote
def PROB_SCORES(words):
encoded_tweet = tokenizer(words, return_tensors = 'pt')
output = model(**encoded_tweet)
# convert the tensor in probability
scores = output[0][0].detach().numpy()
scores = softmax(scores)
prob_scores = scores[0] * (-1) + scores[-1] * 1 # neg to -1, pos to 1
return prob_scores
ray.get([PROB_SCORES.remote(Tweets['Content'])]) # 'Tweets' is the dataframe which contains data.
However, I get the error message like this,
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
c:\Users\HERO\OneDrive\Github\[i-MAES]\[TextAnalysis]\TextAnalysis\NLTK_STUDY\2. Senti.ipynb Cell 11' in <cell line: 1>()
----> 1 ray.get([PROB_SCORES.remote(S8['Content'])])
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\remote_function.py:111, in RemoteFunction.__init__.<locals>._remote_proxy(*args, **kwargs)
109 #wraps(function)
110 def _remote_proxy(*args, **kwargs):
--> 111 return self._remote(args=args, kwargs=kwargs, **self._default_options)
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\util\tracing\tracing_helper.py:303, in _tracing_task_invocation.<locals>._invocation_remote_span(self, args, kwargs, *_args, **_kwargs)
301 if kwargs is not None:
302 assert "_ray_trace_ctx" not in kwargs
--> 303 return method(self, args, kwargs, *_args, **_kwargs)
305 assert "_ray_trace_ctx" not in kwargs
307 tracer = _opentelemetry.trace.get_tracer(__name__)
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\remote_function.py:213, in RemoteFunction._remote(self, args, kwargs, **task_options)
210 raise TypeError(msg) from e
212 self._last_export_session_and_job = worker.current_session_and_job
--> 213 worker.function_actor_manager.export(self)
215 kwargs = {} if kwargs is None else kwargs
216 args = [] if args is None else args
File c:\Users\HERO\anaconda3\envs\text\lib\site-packages\ray\_private\function_manager.py:205, in FunctionActorManager.export(self, remote_function)
...
653 ray_constants.FUNCTION_SIZE_ERROR_THRESHOLD // (1024 * 1024),
654 )
--> 655 raise ValueError(error)
ValueError: The remote function __main__.PROB_SCORES is too large (476 MiB > FUNCTION_SIZE_ERROR_THRESHOLD=95 MiB). Check that its definition is not implicitly capturing a large array or other object in scope. Tip: use ray.put() to put large objects in the Ray object store.
I have no idea why this problem is happening. The 'Tweets' dataframe consumes the memory of only about 5MB.
Any help would be my lifesaver.
I'm trying to download the MNIST dataset from openml, using the openml library.
I tried using Jupyter notebooks because I don't want to download the same dataset every time.
Problem is, after running the following code, I get an error:
from openml.datasets import get_dataset
mnist = get_dataset(554)
x, y, p, q = mnist.get_data(
dataset_format="dataframe", target=mnist.default_target_attribute
)
I'm pasting the whole error message I get, the problem occurs when I try assigning the .get_data to x, y, p and q.
The environment I'm running this on is called Oceanic.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:491, in OpenMLDataset._cache_compressed_file_from_file(self, data_file)
490 try:
--> 491 data = pd.read_parquet(data_file)
492 except Exception as e:
File ~\anaconda3\envs\Oceanic\lib\site-packages\pandas\io\parquet.py:493, in read_parquet(path, engine, columns, storage_options, use_nullable_dtypes, **kwargs)
491 impl = get_engine(engine)
--> 493 return impl.read(
494 path,
495 columns=columns,
496 storage_options=storage_options,
497 use_nullable_dtypes=use_nullable_dtypes,
498 **kwargs,
499 )
File ~\anaconda3\envs\Oceanic\lib\site-packages\pandas\io\parquet.py:240, in PyArrowImpl.read(self, path, columns, use_nullable_dtypes, storage_options, **kwargs)
239 try:
--> 240 result = self.api.parquet.read_table(
241 path_or_handle, columns=columns, **kwargs
242 ).to_pandas(**to_pandas_kwargs)
243 if manager == "array":
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\parquet.py:1731, in read_table(source, columns, use_threads, metadata, use_pandas_metadata, memory_map, read_dictionary, filesystem, filters, buffer_size, partitioning, use_legacy_dataset, ignore_prefixes)
1727 dataset = ParquetFile(
1728 source, metadata=metadata, read_dictionary=read_dictionary,
1729 memory_map=memory_map, buffer_size=buffer_size)
-> 1731 return dataset.read(columns=columns, use_threads=use_threads,
1732 use_pandas_metadata=use_pandas_metadata)
1734 if ignore_prefixes is not None:
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\parquet.py:1608, in _ParquetDatasetV2.read(self, columns, use_threads, use_pandas_metadata)
1606 use_threads = False
-> 1608 table = self._dataset.to_table(
1609 columns=columns, filter=self._filter_expression,
1610 use_threads=use_threads
1611 )
1613 # if use_pandas_metadata, restore the pandas metadata (which gets
1614 # lost if doing a specific `columns` selection in to_table)
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\_dataset.pyx:458, in pyarrow._dataset.Dataset.to_table()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\_dataset.pyx:2889, in pyarrow._dataset.Scanner.to_table()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\error.pxi:141, in pyarrow.lib.pyarrow_internal_check_status()
File ~\anaconda3\envs\Oceanic\lib\site-packages\pyarrow\error.pxi:112, in pyarrow.lib.check_status()
OSError: NotImplemented: Support for codec 'snappy' not built
The above exception was the direct cause of the following exception:
Exception Traceback (most recent call last)
Input In [10], in <cell line: 1>()
----> 1 x, y, p, q = mnist.get_data(
2 dataset_format="dataframe", target=mnist.default_target_attribute
3 )
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:698, in OpenMLDataset.get_data(self, target, include_row_id, include_ignore_attribute, dataset_format)
658 def get_data(
659 self,
660 target: Optional[Union[List[str], str]] = None,
(...)
668 List[str],
669 ]:
670 """ Returns dataset content as dataframes or sparse matrices.
671
672 Parameters
(...)
696 List of attribute names.
697 """
--> 698 data, categorical, attribute_names = self._load_data()
700 to_exclude = []
701 if not include_row_id and self.row_id_attribute is not None:
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:531, in OpenMLDataset._load_data(self)
528 self._download_data()
530 file_to_load = self.data_file if self.parquet_file is None else self.parquet_file
--> 531 return self._cache_compressed_file_from_file(file_to_load)
533 # helper variable to help identify where errors occur
534 fpath = self.data_feather_file if self.cache_format == "feather" else self.data_pickle_file
File ~\anaconda3\envs\Oceanic\lib\site-packages\openml\datasets\dataset.py:493, in OpenMLDataset._cache_compressed_file_from_file(self, data_file)
491 data = pd.read_parquet(data_file)
492 except Exception as e:
--> 493 raise Exception(f"File: {data_file}") from e
495 categorical = [data[c].dtype.name == "category" for c in data.columns]
496 attribute_names = list(data.columns)
Exception: File: C:\Users\Irving\.openml\org\openml\www\datasets\554\dataset.pq
Now, I've written the same code on Pycharm and it works just fine, I managed to correctly assign the dataframes and show them to me. I've got no idea why this isn't working and I would like to know why because I would prefer to work with Jupyter notebooks.
Any help is appreciated, thanks in advance.
The example is fully reproducible. Here is full notebook (which downloads data too): https://github.com/ageron/handson-ml2/blob/master/02_end_to_end_machine_learning_project.ipynb
After this part in notebook above:
full_pipeline_with_predictor = Pipeline([
("preparation", full_pipeline),
("linear", LinearRegression())
])
full_pipeline_with_predictor.fit(housing, housing_labels)
full_pipeline_with_predictor.predict(some_data)
I am trying to get predictions on the test set with this code:
X_test_prepared = full_pipeline.transform(X_test)
final_predictions = full_pipeline_with_predictor.predict(X_test_prepared)
But I am receiving error:
C:\Users\Alex\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py:430: FutureWarning: Given feature/column names or counts do not match the ones for the data given during fit. This will fail from v0.24.
FutureWarning)
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
796 try:
--> 797 tasks = self._ready_batches.get(block=False)
798 except queue.Empty:
~\AppData\Local\Continuum\anaconda3\lib\queue.py in get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-141-dc87b1c9e658> in <module>
5
6 X_test_prepared = full_pipeline.transform(X_test)
----> 7 final_predictions = full_pipeline_with_predictor.predict(X_test_prepared)
8
9 final_mse = mean_squared_error(y_test, final_predictions)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
114
115 # lambda, but not partial, allows help() to work with update_wrapper
--> 116 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
117 # update the docstring of the returned function
118 update_wrapper(out, self.fn)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\pipeline.py in predict(self, X, **predict_params)
417 Xt = X
418 for _, name, transform in self._iter(with_final=False):
--> 419 Xt = transform.transform(Xt)
420 return self.steps[-1][-1].predict(Xt, **predict_params)
421
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in transform(self, X)
586
587 self._validate_features(X.shape[1], X_feature_names)
--> 588 Xs = self._fit_transform(X, None, _transform_one, fitted=True)
589 self._validate_output(Xs)
590
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted)
455 message=self._log_message(name, idx, len(transformers)))
456 for idx, (name, trans, column, weight) in enumerate(
--> 457 self._iter(fitted=fitted, replace_strings=True), 1))
458 except ValueError as e:
459 if "Expected 2D array, got 1D array instead" in str(e):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
~\AppData\Local\Continuum\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
806 big_batch_size = batch_size * n_jobs
807
--> 808 islice = list(itertools.islice(iterator, big_batch_size))
809 if len(islice) == 0:
810 return False
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in <genexpr>(.0)
454 message_clsname='ColumnTransformer',
455 message=self._log_message(name, idx, len(transformers)))
--> 456 for idx, (name, trans, column, weight) in enumerate(
457 self._iter(fitted=fitted, replace_strings=True), 1))
458 except ValueError as e:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\utils\__init__.py in _safe_indexing(X, indices, axis)
404 if axis == 1 and indices_dtype == 'str' and not hasattr(X, 'loc'):
405 raise ValueError(
--> 406 "Specifying the columns using strings is only supported for "
407 "pandas DataFrames"
408 )
ValueError: Specifying the columns using strings is only supported for pandas DataFrames
Question: How can I correct that error? And why that error happens?
Since your final pipeline:
full_pipeline_with_predictor = Pipeline([
("preparation", full_pipeline),
("linear", LinearRegression())
])
clearly contains already the full_pipeline, you should not "prepare" again your X_test; doing so, you are "preparing" X_test twice, which is wrong. So, your code should be simply
final_predictions = full_pipeline_with_predictor.predict(X_test)
exactly as it is for getting predictions for some_data, i.e.
full_pipeline_with_predictor.predict(some_data)
which some_data you correctly do not "prepare" before feeding them into the final pipeline.
The whole point of using pipelines is exactly this, i.e. to avoid having to run separately fit-predict for possibly several preparation steps, having wrapped all of them into a single pipeline instead. You correctly apply this process here when you predict some_data, but you somehow seem to have forgotten it in the next step, when you try to predict X_test.
I would like to add aditional augmenmentation this way:
additional_aug=[zoom_crop(scale=(0.75,1.25), do_rand=False),
brightness(),
contrast()
]
tfms = get_transforms(do_flip=True,flip_vert=True,max_lighting=0.2, xtra_tfms=additional_aug)
data = (ImageList.from_df(df=df,path='./',cols='path')
.split_by_rand_pct(0.2)
.label_from_df(cols='diagnosis',label_cls=FloatList)
.transform(tfms,size=sz,resize_method=ResizeMethod.SQUISH,padding_mode='zeros')
.databunch(bs=bs,num_workers=4)
.normalize(imagenet_stats)
)
But I get errors:
--------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/fastai/data_block.py in _check_kwargs(ds, tfms, **kwargs)
590 x = ds[0]
--> 591 try: x.apply_tfms(tfms, **kwargs)
592 except Exception as e:
/opt/conda/lib/python3.6/site-packages/fastai/vision/image.py in apply_tfms(self, tfms, do_resolve, xtra, size, resize_method, mult, padding_mode, mode, remove_out)
105 if resize_method <= 2 and size is not None: tfms = self._maybe_add_crop_pad(tfms)
--> 106 tfms = sorted(tfms, key=lambda o: o.tfm.order)
107 if do_resolve: _resolve_tfms(tfms)
/opt/conda/lib/python3.6/site-packages/fastai/vision/image.py in <lambda>(o)
105 if resize_method <= 2 and size is not None: tfms = self._maybe_add_crop_pad(tfms)
--> 106 tfms = sorted(tfms, key=lambda o: o.tfm.order)
107 if do_resolve: _resolve_tfms(tfms)
AttributeError: 'list' object has no attribute 'tfm'
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-27-3daf86c69a96> in <module>
2 .split_by_rand_pct(0.2)
3 .label_from_df(cols='diagnosis',label_cls=FloatList)
----> 4 .transform(tfms,size=sz,resize_method=ResizeMethod.SQUISH,padding_mode='zeros')
5 .databunch(bs=bs,num_workers=4)
6 .normalize(imagenet_stats)
/opt/conda/lib/python3.6/site-packages/fastai/data_block.py in transform(self, tfms, **kwargs)
500 if not tfms: tfms=(None,None)
501 assert is_listy(tfms) and len(tfms) == 2, "Please pass a list of two lists of transforms (train and valid)."
--> 502 self.train.transform(tfms[0], **kwargs)
503 self.valid.transform(tfms[1], **kwargs)
504 if self.test: self.test.transform(tfms[1], **kwargs)
/opt/conda/lib/python3.6/site-packages/fastai/data_block.py in transform(self, tfms, tfm_y, **kwargs)
719 def transform(self, tfms:TfmList, tfm_y:bool=None, **kwargs):
720 "Set the `tfms` and `tfm_y` value to be applied to the inputs and targets."
--> 721 _check_kwargs(self.x, tfms, **kwargs)
722 if tfm_y is None: tfm_y = self.tfm_y
723 tfms_y = None if tfms is None else list(filter(lambda t: getattr(t, 'use_on_y', True), listify(tfms)))
/opt/conda/lib/python3.6/site-packages/fastai/data_block.py in _check_kwargs(ds, tfms, **kwargs)
591 try: x.apply_tfms(tfms, **kwargs)
592 except Exception as e:
--> 593 raise Exception(f"It's not possible to apply those transforms to your dataset:\n {e}")
594
595 class LabelList(Dataset):
Exception: It's not possible to apply those transforms to your dataset:
'list' object has no attribute 'tfm'
According to documentation xtra_tfms : Optional [ Collection [ Transform ]]= None ) → Collection [ Transform ]
How to get it work?
I have faced this problem and the solution is very simple. Just call each transform function that you want to apply each inside a separate list enclosed by a list and pass it into get_transforms function's xtra_tfms parameter. (It could even be a tuple of tuples or any collection)
additional_aug=[[zoom_crop(scale=(0.75,1.25), do_rand=False)],
[brightness()],
[contrast()]]
tfms = get_transforms(do_flip=True,
flip_vert=True,
max_lighting=0.2,
xtra_tfms=additional_aug)
Hope this solves your problem.