Python load .features file - python

I have a dataset (Kvasir data set) with several files that contains the features extraction in the format:
JCD:3.0,3.5,6.0...
Tamura:3.608455882352941,6.681779104634786,632.0,130.0...
ColorLayout:11.0,25.0,9.0,4.0,16.0...
EdgeHistogram:0.0,0.0,4.0,0.0,1.0,1.0,4.0...
AutoColorCorrelogram:13.0,13.0,12.0,12.0,13.0,13.0,12.0...
I'm trying to load all the features files with this code:
dat=sklearn.datasets.load_files("/MTU/Q3/kvasir-dataset-v2-features")
df=pd.DataFrame(data=dat.data,columns=dat.feature_names)
but I get this error:
KeyError Traceback (most recent call last)
C:\Python310\lib\site-packages\sklearn\utils\__init__.py in __getattr__(self, key)
116 try:
--> 117 return self[key]
118 except KeyError:
KeyError: 'feature_names'
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22320/2380416560.py in <module>
----> 1 df=pd.DataFrame(data=dat.data,columns=dat.feature_names)
C:\Python310\lib\site-packages\sklearn\utils\__init__.py in __getattr__(self, key)
117 return self[key]
118 except KeyError:
--> 119 raise AttributeError(key)
120
121 def __setstate__(self, state):
AttributeError: feature_names

Related

'Config' object has no attribute 'jax_experimental_name_stack'

Schreenshot of the error message
I'm trying to run Alphafold on Google runtime, and I'm getting this error:
UnfilteredStackTrace Traceback (most recent call last)
<ipython-input-5-ca4ee2dc266d> in <module>
44 processed_feature_dict = model_runner.process_features(np_example, random_seed=0)
---> 45 prediction = model_runner.predict(processed_feature_dict, random_seed=random.randrange(sys.maxsize))
46
UnfilteredStackTrace: AttributeError: 'Config' object has no attribute 'jax_experimental_name_stack'
The stack trace below excludes JAX-internal frames.
The preceding is the original exception that occurred, unmodified.
--------------------
The above exception was the direct cause of the following exception:
AttributeError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/haiku/_src/module.py in wrapped(self, *args, **kwargs)
406 f = functools.partial(unbound_method, self)
407 f = functools.partial(run_interceptors, f, method_name, self)
--> 408 if jax.config.jax_experimental_name_stack and module_name:
409 local_module_name = module_name.split("/")[-1]
410 f = jax.named_call(f, name=local_module_name)
AttributeError: 'Config' object has no attribute 'jax_experimental_name_stack'
How do I resolve this error? The link for Alphafold is below.
https://colab.research.google.com/github/deepmind/alphafold/blob/main/notebooks/AlphaFold.ipynb

The above exception was the direct cause of the following exception - ValueError: 10 is not in range

I am trying to run the below code to load the dataset into a PyTorch dataset class with a custom collate function and map them but I am getting the error. The dataset consists of 123061 data samples so in the below code I have used only 10 samples. if i use total dataset then i am getting error of ValueError: 123061 is not in range. So exactly where i am doing wrong?
class Dataclass(Dataset):
def __init__(self,dataset):
self.dataset = dataset
def __len__(self):
return len(self.dataset)
def __getitem__(self, idx):
solute = self.dataset.loc[idx]['Drug1_SMILES']
mol = Chem.MolFromSmiles(solute)
mol = Chem.AddHs(mol)
solute = Chem.MolToSmiles(mol)
solute_graph = get_graph_from_smile(solute)
solvent = self.dataset.loc[idx]['Drug2_SMILES']
mol = Chem.MolFromSmiles(solvent)
mol = Chem.AddHs(mol)
solvent = Chem.MolToSmiles(mol)
solvent_graph = get_graph_from_smile(solvent)
delta_g = self.dataset.loc[idx]['label']
return [solute_graph, solvent_graph]
tg = Dataclass(train_df[:10])
solute_graphs, solvent_graphs, labels = map(list, zip(*tg))
Error
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
384 try:
--> 385 return self._range.index(new_key)
386 except ValueError as err:
ValueError: 10 is not in range
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
6 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
385 return self._range.index(new_key)
386 except ValueError as err:
--> 387 raise KeyError(key) from err
388 raise KeyError(key)
389 return super().get_loc(key, method=method, tolerance=tolerance)
KeyError: 10

embedding_vectors = get_weight_matrix(w2v_model)

---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-184-6ab4a6968e06> in <module>()
----> 1 embedding_vectors = get_weight_matrix(w2v_model)
3 frames
/usr/local/lib/python3.7/dist-packages/gensim/models/keyedvectors.py in word_vec(self, word, use_norm)
450 return result
451 else:
--> 452 raise KeyError("word '%s' not in vocabulary" % word)
453
454 def get_vector(self, word):

KeyError Traceback (most recent call last)

**from geopy.geocoders import Here
exif = get_exif('earth_postcard_1599147372.jpg')
geotags = get_geotagging(exif)
coords = get_coordinates(geotags)
geocoder = Here(apikey=os.environ['API_KEY'])
print(geocoder.reverse("%s,%s" % coords))**
ERROR
KeyError Traceback (most recent call last)
<ipython-input-13-baa26ae24e59> in <module>
4 geotags = get_geotagging(exif)
5 coords = get_coordinates(geotags)
----> 6 geocoder = Here(apikey=os.environ['API_KEY'])
7 print(geocoder.reverse("%s,%s" % coords))
~\anaconda3\lib\os.py in __getitem__(self, key)
677 except KeyError:
678 # raise KeyError with the original key value
--> 679 raise KeyError(key) from None
680 return self.decodevalue(value)
681
KeyError: 'API_KEY'
Here i try to get the location of using latitude and logitude using geopy library but im getting key error
You should check whether you have set your environment parameter.You can enter into python command line mode then input :
import os
os.environ.keys()
System will output all the environment parameter."API_KEY" should not be set. If you set this key in system parameter list,this error will be gone.

Performing Dickey-Fuller test in Python

I'm trying to perform the Dickey-Fuller test in part of the code and this error is displayed:
TypeError: 'str' object cannot be interpreted as an integer
When I try the same test in another part of the code, it works fine.
The only difference I noticed is in the arrangement of the data as you can see in this print (https://i.stack.imgur.com/czUB1.jpg)
Where the "table" marked in red is the one that does not work.
Code:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(indexedDataset_logScale)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(indexedDataset_logScale, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)
Complete traceback:
Results of Dickey-Fuller Test
TypeError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4410 try:
-> 4411 return libindex.get_value_at(s, key)
4412 except IndexError:
pandas\_libs\index.pyx in pandas._libs.index.get_value_at()
pandas\_libs\index.pyx in pandas._libs.index.get_value_at()
pandas\_libs\util.pxd in pandas._libs.util.get_value_at()
pandas\_libs\util.pxd in pandas._libs.util.validate_indexer()
TypeError: 'str' object cannot be interpreted as an integer
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
650 try:
--> 651 value = Index.get_value(self, series, key)
652 except KeyError:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4418 else:
-> 4419 raise e1
4420 except Exception:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_value(self, series, key)
4404 try:
-> 4405 return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
4406 except KeyError as e1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.DatetimeEngine._date_check_type()
KeyError: '#Passengers'
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
pandas\_libs\tslibs\parsing.pyx in pandas._libs.tslibs.parsing.parse_datetime_string()
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(timestr, parserinfo, **kwargs)
1373 else:
-> 1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1375
C:\ProgramData\Anaconda3\lib\site-packages\dateutil\parser\_parser.py in parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
648 if res is None:
--> 649 raise ParserError("Unknown string format: %s", timestr)
650
ParserError: Unknown string format: #Passengers
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
659 try:
--> 660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value_maybe_box(self, series, key)
674 elif not isinstance(key, Timestamp):
--> 675 key = Timestamp(key)
676 values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz)
pandas\_libs\tslibs\timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
pandas\_libs\tslibs\conversion.pyx in pandas._libs.tslibs.conversion.convert_str_to_tsobject()
ValueError: could not convert string to Timestamp
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-18-f3dd1636e820> in <module>
23 decomposedLogData = residual
24 decomposedLogData.dropna(inplace=True)
---> 25 test_stationarity(decomposedLogData)
26
<ipython-input-12-099228b0a850> in test_stationarity(timeseries)
16 #Perform Dickey-Fuller test:
17 print('Results of Dickey-Fuller Test:')
---> 18 dftest = adfuller(timeseries['#Passengers'], autolag='AIC')
19 dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
20 for key,value in dftest[4].items():
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
869 key = com.apply_if_callable(key, self)
870 try:
--> 871 result = self.index.get_value(self, key)
872
873 if not is_scalar(result):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_value(self, series, key)
660 return self.get_value_maybe_box(series, key)
661 except (TypeError, ValueError, KeyError):
--> 662 raise KeyError(key)
663 else:
664 return com.maybe_box(self, value, series, key)
KeyError: '#Passengers'
Would you please help me with this question?
Thank you.
Marcelo
In the function "test_stationary" when implemending adfuller the argument timeseries should have column named '#Passengers'.
But when implementing seasonal_decompose the output comes without column name.
My solution was to convert decomposedLogData to dataframe and to give it '#Passengers' column name back. Worked for me.
decomposedLogData = residual
decomposedLogData.dropna(inplace = True)
decomposedLogData = pd.DataFrame(decomposedLogData)
decomposedLogData.columns = ["#Passengers"]
test_stationary(decomposedLogData)
If you've already found more elegant solution, please share :)

Categories