I have a datafram called combine which needs to be inserted into the excel.
writer = pd.ExcelWriter('YTD.xlsx', engine ='xlsxwriter', options={'strings_to_numbers':True})
workbook = writer.book
combine.to_excel(writer, sheet_name='Sheet1', startrow=1 , startcol=0, header=True, index=False, encoding='utf8')
worksheet1 = writer.sheets['Sheet1']
num_fmt = workbook.add_format({'num_format': '#,##0.00_ ;[Red]-#,##0.00 '})
worksheet1.set_column('B:AJ', 15, num_fmt)
writer.save()
the result is interesting:
as I used options={'strings_to_numbers':True}, some are already transfered into numbers but some are not; I guess ',' may be the problem; so I tried to get rid of ',' first:
for col in combine.columns[1:]: # UPDATE ONLY NUMERIC COLS
combine[col].replace(',','')
combine[col] = combine[col].astype(float) # CONVERT TO FLOAT
but failed:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-16-ce75c2cc211a> in <module>
1 for col in combine.columns[1:]: # UPDATE ONLY NUMERIC COLS
2 #combine[col].replace(',','')
----> 3 combine[col] = combine[col].astype(float) # CONVERT TO FLOAT
~\Anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
~\Anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
727 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
728 # Explicit copy, or required since NumPy can't view from / to object.
--> 729 return arr.astype(dtype, copy=True)
730
731 return arr.view(dtype)
ValueError: could not convert string to float: '-181,849.78'
Really dont know now, others may have the same problem, so I post the problem here
This should work:-
for col in combine.columns[1:]:
combine[col] = combine[col].str.replace(',', '').astype(float)
You forgot to put .str after combine[col]
Related
in one column in my df I have values and unite of measurements, so I extracted the values to another column however the dtype is still object
My table:
cost
uom_value
23226.8835 kg
23226.8835
244.09 kg
244.09
24226.5 kg
24226.5
255.01 kg
255.01
I wanted to convert them to float to use them in my calculations however I am getting the below error, even when doing a simple calculation such as df['uom_value'].astype(float).sum()
any help is appreciated
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [22], in <cell line: 1>()
----> 1 df['uom_value'].astype(float).sum()
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\generic.py:5912, in NDFrame.astype(self, dtype, copy, errors)
5905 results = [
5906 self.iloc[:, i].astype(dtype, copy=copy)
5907 for i in range(len(self.columns))
5908 ]
5910 else:
5911 # else, only a single dtype is given
-> 5912 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5913 return self._constructor(new_data).__finalize__(self, method="astype")
5915 # GH 33113: handle empty frame or series
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors)
418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T:
--> 419 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\internals\blocks.py:580, in Block.astype(self, dtype, copy, errors)
562 """
563 Coerce to the new dtype.
564
(...)
576 Block
577 """
578 values = self.values
--> 580 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
582 new_values = maybe_coerce_values(new_values)
583 newb = self.make_block(new_values)
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1292, in astype_array_safe(values, dtype, copy, errors)
1289 dtype = dtype.numpy_dtype
1291 try:
-> 1292 new_values = astype_array(values, dtype, copy=copy)
1293 except (ValueError, TypeError):
1294 # e.g. astype_nansafe can fail on object-dtype of strings
1295 # trying to convert to float
1296 if errors == "ignore":
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1237, in astype_array(values, dtype, copy)
1234 values = values.astype(dtype, copy=copy)
1236 else:
-> 1237 values = astype_nansafe(values, dtype, copy=copy)
1239 # in pandas we don't store numpy str dtypes, so convert to object
1240 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File ~\Anaconda3\envs\general\lib\site-packages\pandas\core\dtypes\cast.py:1181, in astype_nansafe(arr, dtype, copy, skipna)
1177 raise ValueError(msg)
1179 if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
1180 # Explicit copy, or required since NumPy can't view from / to object.
-> 1181 return arr.astype(dtype, copy=True)
1183 return arr.astype(dtype, copy=copy)
ValueError: could not convert string to float: 'you can see diemension in the order, it was 3'
Use from pandas.to_numeric method. Note that it is better to set the coerce argument for your method based on your usage. For more detail, read about it in docs
I want to drop rows in pandas dataframe meth_clin_sub_nt_kipanif the columns in meth_clin_sub_nt_kipan.iloc[:,7:-1] is NA.
import pandas as pd
import numpy as np
# Drop rows if cg* columns has NA
meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_3010/559698406.py in <module>
1 # Drop rows if cg* columns has NA
----> 2 meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)
/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in dropna(self, axis, how, thresh, subset, inplace)
5948 if subset is not None:
5949 ax = self._get_axis(agg_axis)
-> 5950 indices = ax.get_indexer_for(subset)
5951 check = indices == -1
5952 if check.any():
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer_for(self, target, **kwargs)
5273 """
5274 if self._index_as_unique:
-> 5275 return self.get_indexer(target, **kwargs)
5276 indexer, _ = self.get_indexer_non_unique(target)
5277 return indexer
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
3435 # returned ndarray is np.intp
3436 method = missing.clean_reindex_fill_method(method)
-> 3437 target = self._maybe_cast_listlike_indexer(target)
3438
3439 self._check_indexing_method(method, limit, tolerance)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in _maybe_cast_listlike_indexer(self, target)
5706 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
5707 """
-> 5708 return ensure_index(target)
5709
5710 #final
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in ensure_index(index_like, copy)
6334 else:
6335
-> 6336 return Index(index_like, copy=copy)
6337
6338
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
474 raise cls._scalar_data_error(data)
475 elif hasattr(data, "__array__"):
--> 476 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
477 else:
478
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
467
468 klass = cls._dtype_to_subclass(arr.dtype)
--> 469 arr = klass._ensure_array(arr, dtype, copy)
470 disallow_kwargs(kwargs)
471 return klass._simple_new(arr, name)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/numeric.py in _ensure_array(cls, data, dtype, copy)
169 if subarr.ndim > 1:
170 # GH#13601, GH#20285, GH#27125
--> 171 raise ValueError("Index data must be 1-dimensional")
172
173 subarr = np.asarray(subarr)
ValueError: Index data must be 1-dimensional
Data:
meth_clin_sub_nt_kipan.iloc[0,0:19].to_dict()
{'admin.disease_code': 'kirp',
'days_to_death': nan,
'vital_status': 'alive',
'age_at_initial_pathologic_diagnosis': 53.0,
'gender': 'male',
'karnofsky_performance_score': nan,
'survival': 'lts',
'cg00000029': 0.461440642939772,
'cg00000165': 0.143910373119058,
'cg00000236': 0.847164847154162,
'cg00000289': 0.737361955793681,
'cg00000292': 0.716794733144112,
'cg00000321': 0.351877113536983,
'cg00000363': 0.248986769373366,
'cg00000622': 0.0121360989202765,
'cg00000658': 0.876303885229884,
'cg00000721': 0.944311384947134,
'cg00000734': 0.0490407302658151,
'cg00000769': 0.0200484962577958}
Try this:
meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])
Btw. if you assigning df with dropped nans to new df you do not need to do inplace=True. It is useful if you want to modify your current df without assigning it to itself, so this:
meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1], inplace=True)
is equivalent to this:
meth_clin_sub_nt_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])
I'm preparing an unbalanced dataset and would like to use a Python package called SMOTE. When I try to run the code it shows up an error: TypeError: cannot safely cast non-equivalent float64 to int64
My dataset (first 5 rows):
Dataset
The error traceback:
TypeError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\arrays\integer.py in safe_cast(values, dtype, copy)
147 try:
--> 148 return values.astype(dtype, casting="safe", copy=copy)
149 except TypeError:
TypeError: Cannot cast array from dtype('float64') to dtype('int64') according to the rule 'safe'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-456-c6d4d3314a06> in <module>
5
6 # transform the dataset
----> 7 X_smote, y_smote = pipeline.fit_resample(X, y)
8
9
~\anaconda3\lib\site-packages\imblearn\pipeline.py in fit_resample(self, X, y, **fit_params)
351 """
352 last_step = self._final_estimator
--> 353 Xt, yt, fit_params = self._fit(X, y, **fit_params)
354 with _print_elapsed_time('Pipeline',
355 self._log_message(len(self.steps) - 1)):
~\anaconda3\lib\site-packages\imblearn\pipeline.py in _fit(self, X, y, **fit_params)
234 )
235 elif hasattr(cloned_transformer, "fit_resample"):
--> 236 X, y, fitted_transformer = fit_resample_one_cached(
237 cloned_transformer, X, y,
238 message_clsname='Pipeline',
~\anaconda3\lib\site-packages\joblib\memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
~\anaconda3\lib\site-packages\imblearn\pipeline.py in _fit_resample_one(sampler, X, y, message_clsname, message, **fit_params)
401 **fit_params):
402 with _print_elapsed_time(message_clsname, message):
--> 403 X_res, y_res = sampler.fit_resample(X, y, **fit_params)
404
405 return X_res, y_res, sampler
~\anaconda3\lib\site-packages\imblearn\base.py in fit_resample(self, X, y)
86 if binarize_y else output[1])
87
---> 88 X_, y_ = arrays_transformer.transform(output[0], y_)
89 return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
90
~\anaconda3\lib\site-packages\imblearn\utils\_validation.py in transform(self, X, y)
38
39 def transform(self, X, y):
---> 40 X = self._transfrom_one(X, self.x_props)
41 y = self._transfrom_one(y, self.y_props)
42 return X, y
~\anaconda3\lib\site-packages\imblearn\utils\_validation.py in _transfrom_one(self, array, props)
57 import pandas as pd
58 ret = pd.DataFrame(array, columns=props["columns"])
---> 59 ret = ret.astype(props["dtypes"])
60 elif type_ == "series":
61 import pandas as pd
~\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5681 if col_name in dtype:
5682 results.append(
-> 5683 col.astype(dtype=dtype[col_name], copy=copy, errors=errors)
5684 )
5685 else:
~\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5696 else:
5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
5699 return self._constructor(new_data).__finalize__(self)
5700
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs):
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444
~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
819 # dispatch on extension dtype if needed
820 if is_extension_array_dtype(dtype):
--> 821 return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)
822
823 if not isinstance(dtype, np.dtype):
~\anaconda3\lib\site-packages\pandas\core\arrays\integer.py in _from_sequence(cls, scalars, dtype, copy)
352 #classmethod
353 def _from_sequence(cls, scalars, dtype=None, copy=False):
--> 354 return integer_array(scalars, dtype=dtype, copy=copy)
355
356 #classmethod
~\anaconda3\lib\site-packages\pandas\core\arrays\integer.py in integer_array(values, dtype, copy)
133 TypeError if incompatible types
134 """
--> 135 values, mask = coerce_to_array(values, dtype=dtype, copy=copy)
136 return IntegerArray(values, mask)
137
~\anaconda3\lib\site-packages\pandas\core\arrays\integer.py in coerce_to_array(values, dtype, mask, copy)
249 values = safe_cast(values, dtype, copy=False)
250 else:
--> 251 values = safe_cast(values, dtype, copy=False)
252
253 return values, mask
~\anaconda3\lib\site-packages\pandas\core\arrays\integer.py in safe_cast(values, dtype, copy)
153 return casted
154
--> 155 raise TypeError(
156 f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
157 )
TypeError: cannot safely cast non-equivalent float64 to int64
X = new_dataset_enc.drop(['stroke'], axis=1)
y = new_dataset_enc['stroke']
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from collections import Counter
num_before = dict(Counter(y))
over = SMOTE(sampling_strategy=0.8)
under = RandomUnderSampler(sampling_strategy=0.8)
steps = [('o', over), ('u', under)]
pipeline = Pipeline(steps=steps)
# transform the dataset
X_smote, y_smote = pipeline.fit_resample(X, y)
# the numbers after SMOTE
num_after =dict(Counter(y_smote))
I have already tried several ways to fix it, but the issue keeps showing up. There are no "NaN" values in columns or missing ones. I've changed the type of each column to int64 and to object to test if that could fix the error. Nothing works.
Appreciate a lot any idea to come up with a solution.
I face the same issue. my solution is similar with Tim's suggestion which is adding:
y = y.astype(np.float64)
but it's not enough for me since it return the same error.
It works when I also convert the x to float64 like:
X = X.astype(np.float64)
right after your first two code line
births['day']=births['day'].astype(int)
i was solving this through jupyter notebook and i am solving is as giving in the book but may be the code change or may be i didn't have the right idea of output to be solved and this should be not done like this..
i don't know please help me solving this query.
i am using dataset births.csv
ValueError
Traceback (most recent call last) <ipython-input-12-e7a41e4b25cc> in <module>
----> 1 births['day']=births['day'].astype(int) ~\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors) 5696 else: 5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors) 5699 return self._constructor(new_data).__finalize__(self) 5700 ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs): ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444 ~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings ~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
866
867 if not np.isfinite(arr).all():
--> 868 raise ValueError("Cannot convert non-finite values (NA or inf) to integer")
869
870 elif is_object_dtype(arr): ValueError: Cannot convert non-finite values (NA or inf) to integer
Your DataFrame seems to contain nan values in that column.
So, you might fill those values or remove them before conversion.
Let's fill NaN values with 0 here:
births['day'] = births['day'].fillna(0).astype(int)
For an enlightening reading on Managing missing Values in Pandas, refer to this link: https://www.geeksforgeeks.org/working-with-missing-data-in-pandas/
I'm working with a data set on suicides, part of which includes a gdp_for_year column. However, the column is of object type and understandably needs to be an int. This is the error I receive:
ValueError Traceback (most recent call last)
<ipython-input-10-ec740fbd9849> in <module>
2 suicides.info()
3
----> 4 suicides['gdp_for_year'] = suicides['gdp_for_year'].astype('int')
~\anaconda3\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5696 else:
5697 # else, only a single dtype is given
-> 5698 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors)
5699 return self._constructor(new_data).__finalize__(self)
5700
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
580
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
--> 582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
583
584 def convert(self, **kwargs):
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, filter, **kwargs)
440 applied = b.apply(f, **kwargs)
441 else:
--> 442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
444
~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
623 vals1d = values.ravel()
624 try:
--> 625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
872 # work around NumPy brokenness, #1987
873 if np.issubdtype(dtype.type, np.integer):
--> 874 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
875
876 # if we have a datetime/timedelta array of objects
pandas\_libs\lib.pyx in pandas._libs.lib.astype_intsafe()
**ValueError: invalid literal for int() with base 10: '2,156,624,900'**
Dataframe info() and head()
Does anyone have suggestions on what I can do?
Your string
'2,156,624,900'
contains comma's. You can't automatically convert this string to an integer. You would first have to remove the comma's. You can do so by:
int('2,156,624,900'.replace(',', ''))
So, in your case, you would either want to follow some of the more elaborate locale settings linked in the comment under your post or apply this replace function on the whole column first, and then convert it to int.