This question already has answers here:
Renaming column names in Pandas
(35 answers)
Closed 1 year ago.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler,normalize
from sklearn.metrics import silhouette_score
newdf = pd.read_csv("D:\DATASETS/CC_GENERAL.csv")
x = newdf.drop('CUST_ID',axis = 1)
x.fillna(method = 'ffill',inplace = True)
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)
x_normalized = normalize(x_scaled)
x_normalized = pd.DataFrame(x_normalized)
pca = PCA(n_components= 2)
x_principal = pca.fit_transform(x_normalized)
x_principal = pd.DataFrame(x_normalized)
x_principal = ['P1','P2']
ac2 = AgglomerativeClustering(n_clusters = 2)
plt.figure(figsize = (6,6))
c= ac2.fit_predict((x_principal),cmap = 'rainbow')
and this is my error:
TypeError Traceback (most recent call last)
<ipython-input-61-56f631c43c3e> in <module>
3 #visualizing the cluster
4 plt.figure(figsize = (6,6))
----> 5 plt.scatter(x_principal['P1'],x_principal['P2'])
6 c= ac2.fit_predict((x_principal),cmap = 'rainbow')
TypeError: list indices must be integers or slices, not str
If you are trying to update the columns names for x_principal, which is more likely, you should be using x_principal.columns = ['P1, 'P2'], right now you are assigning those values, which is overwriting the data
x_principal is a list containing two strings P1 and P2. So x_principal['P1'] is wrong. You can not index list elements with the element itself.
Practicing with MetPy Monday interpolate_to_grid for metar data and I successfully got the mslp grid to work.
Moving on to Potential temperature and the result has been all nan. When it "works". When it doesnt work, I get a set of errors that dont appear to help...
import numpy as np
import as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from siphon.catalog import TDSCatalog
from import parse_metar_file
from metpy.interpolate import interpolate_to_grid, remove_nan_observations
from metpy.plots import add_metpy_logo, current_weather, sky_cover, StationPlot
from metpy.calc import wind_components, wet_bulb_temperature, altimeter_to_station_pressure,potential_temperature,gradient
from metpy.units import units
from datetime import datetime,timedelta
import pandas as pd
mapcrs = ccrs.LambertConformal(central_longitude=-100.,central_latitude=35.,standard_parallels=(30.,60.))
datacrs = ccrs.PlateCarree()
cat = TDSCatalog('')
ds = cat.datasets[-4]
dattim =[6:14]+' '[15:19]
df = parse_metar_file(
#pandas dataframe
extent = [-120,-72,24,50]
df = df.dropna(subset=['latitude','longitude','elevation','altimeter','air_temperature','eastward_wind','northward_wind','air_pressure_at_sea_level','dew_point_temperature'])
lon = df['longitude'].values
lat = df['latitude'].values
stn_ids = df['station_id'].values
elev = df['elevation'].values
altimeter = df['altimeter'].values
t2 = df['air_temperature'].values
mslp = df['air_pressure_at_sea_level'].values
#projected coords
xp, yp, _ = mapcrs.transform_points(datacrs,lon,lat).T # x,y returned
#mslp WORKS
x_masked, y_masked, mslp = remove_nan_observations(xp,yp,mslp)
#altgridx,altgridy,alt = interpolate_to_grid(x_masked,y_masked,alt, interp_type='cressman')
altgridx,altgridy,mslp = interpolate_to_grid(x_masked,y_masked,mslp, interp_type='barnes',gamma=.5,kappa_star=10, hres=25000)
#Potential Temperature doesnt work
pres = altimeter_to_station_pressure(altimeter * units('mbar'), elev * units('m'))*33.8639
# theta
x_masked, y_masked, temp = remove_nan_observations(xp,yp,t2*units('degC'))
x_masked, y_masked, pres = remove_nan_observations(xp,yp,pres)
potemp = potential_temperature(pres, temp)
grdx = 75000.
thgridx,thgridy,theta = interpolate_to_grid(x_masked,y_masked, potemp, interp_type='barnes',kappa_star=6, gamma=0.5,hres=grdx)
Here is what is returned from the last section:
[949.361081708803 993.4468013877739 987.2845093729651 ... 1029.0930108008558 1016.002484792407 930.3708063382303] millibar
[236.32885315 237.21299941 239.04372591 ... 368.37047837 369.20079652
DimensionalityError Traceback (most recent call last)
~/miniconda3/lib/python3.7/site-packages/pint/ in __float__(self)
896 return float(self._convert_magnitude_not_inplace(UnitsContainer()))
--> 897 raise DimensionalityError(self._units, "dimensionless")
DimensionalityError: Cannot convert from 'kelvin' to 'dimensionless'
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
/var/folders/5n/sg5k98bx6gg4flb4fskykh4m0000gn/T/ipykernel_41626/ in <module>
12 grdx = 75000.
---> 13 thgridx,thgridy,theta = interpolate_to_grid(x_masked,y_masked, potemp, interp_type='barnes',kappa_star=6, gamma=0.5,hres=grdx)
14 print(np.shape(thgridx))
15 print(np.unique(theta))
~/miniconda3/lib/python3.7/site-packages/metpy/ in wrapper(*args, **kwargs)
19 kwargs = {name: (v.values if isinstance(v, pd.Series) else v)
20 for name, v in kwargs.items()}
---> 21 return func(*args, **kwargs)
22 return wrapper
~/miniconda3/lib/python3.7/site-packages/metpy/interpolate/ in interpolate_to_grid(x, y, z, interp_type, hres, minimum_neighbors, gamma, kappa_star, search_radius, rbf_func, rbf_smooth, boundary_coords)
301 minimum_neighbors=minimum_neighbors, gamma=gamma,
302 kappa_star=kappa_star, search_radius=search_radius,
--> 303 rbf_func=rbf_func, rbf_smooth=rbf_smooth)
305 return grid_x, grid_y, img.reshape(grid_x.shape)
~/miniconda3/lib/python3.7/site-packages/metpy/interpolate/ in interpolate_to_points(points, values, xi, interp_type, minimum_neighbors, gamma, kappa_star, search_radius, rbf_func, rbf_smooth)
365 return inverse_distance_to_points(points, values, xi, search_radius, gamma, kappa,
366 min_neighbors=minimum_neighbors,
--> 367 kind=interp_type)
369 # If this is radial basis function, make the interpolator and apply it
~/miniconda3/lib/python3.7/site-packages/metpy/interpolate/ in inverse_distance_to_points(points, values, xi, r, gamma, kappa, min_neighbors, kind)
268 img[idx] = cressman_point(dists, values_subset, r)
269 elif kind == 'barnes':
--> 270 img[idx] = barnes_point(dists, values_subset, kappa, gamma)
272 else:
ValueError: setting an array element with a sequence.
I struggled with Units, but I think the units are correct now. What could be causing this?
I tried cressman, I tried a larger Barnes grid, and I tried making sure search_radius was large. Still nan, when it worked.
The problem is caused by interpolate_to_grid choking on units when using Cressman or Barnes--which we definitely need to fix. For now the solution is to either use a different interpolation method (like interp_type='linear', the default), or to strip units before calling:
thgridx, thgridy, theta = interpolate_to_grid(x_masked, y_masked, potemp.magnitude,
interp_type='barnes', kappa_star=6, gamma=0.5, hres=grdx)
theta = units.Quantity(theta, 'K')
As far as your problems with NaNs is concerned, you may want to look at the search_radius parameter, which controls the maximum distance from a target point observations are considered. In some data-sparse areas, this could cause you to have some drop-outs. By default, it uses a guess of 5 times the average distance from one ob point to its nearest neighbor.
I'm trying to apply some machine learning based regression on data from a CSV file. My columns are:
Index(['date', 'customer_id', 'product_category', 'payment_method',
'value [USD]', 'time_on_site', 'clicks_in_site', 'USD/[Minutes]',
When I run:
from pycaret.regression import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
exp_reg = setup(data = df, target='value [USD]', session_id=123,
high_cardinality_features = ['product_category'],
normalize = True,
ignore_features = ['customer_id', 'date', 'time_on_site']
I get the following error message:
KeyError Traceback (most recent call last)
<ipython-input-43-20eab85de0cc> in <module>()
2 high_cardinality_features = ['product_category'],
3 normalize = True,
----> 4 ignore_features = ['customer_id', 'date', 'time_on_site']
5 )
5 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/ in drop(self, labels, errors)
5285 if mask.any():
5286 if errors != "ignore":
-> 5287 raise KeyError(f"{labels[mask]} not found in axis")
5288 indexer = indexer[~mask]
5289 return self.delete(indexer)
KeyError: "['value [USD]'] not found in axis"
I found the solution. The column name ['value [USD]'] was the problem. After renaming it the code works as intended. It has probably something to do with the brackets inside the column name which can maybe be interpreted as a dictionary or list but I'm not sure.
I'm expected to read a column "Horas Estudo" and a number(x) = 5.95 then calculate the y= ax + b.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv("")
col = input()
x = round(float(input()),2)
(a,b) = np.polyfit(df[col],df["Nota Final"], deg=2)
ValueError Traceback (most recent call last)
<ipython-input-81-7e077f926863> in <module>()
7 x = round(float(input()),2)
----> 9 (a,b) = np.polyfit(df[col],df["Nota Final"], deg=2)
10 print(a,b)
ValueError: too many values to unpack (expected 2)
np.polyfit returns the polynomial coefficients in the form of an ndarray and not a tuple. So put a single variable on the left and print it.
Also if you are trying to find a linear fit (y=ax+b), put the deg=1 in the function call
I got an error ValueError: Found infinity in column x.
Traceback says
---> 20
242 df[‘x’] = pd.to_numeric(df[‘x’])
243 if np.isinf(df[‘x’].values).any():
--> 244 raise ValueError('Found infinity in column y.')
245 df[‘d’] = pd.to_datetime(df[‘d’])
246 if df[‘d’].isnull().any():
I really cannot understand what is the meaning of this error message because I do not have infinity number in df.How should I fix this?What is wrong in my codes?
My codes is
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from fbprophet import Prophet
for i in range(10):
df = pd.read_csv('data'+ i + '.csv', encoding='shift-jis')
model = Prophet()
future_data = model.make_future_dataframe(periods=12, freq = 'm')
forecast_data = model.predict(future_data)
So, you need to remove infinity values from your DataFrame. It can be done like this:
DataFrame.replace([np.inf, -np.inf], np.nan)
When you replaced infinity values to NaN you can remove it from DataFrame via dropna:
DataFrame.dropna(subset=["YourColumn"], how="all")