Can someone help me with how to create a scatterplot. I have written the following code, however, it is not the scatter plot link that I expected as all data only concentrate 3 values of x-variable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from scipy.stats import skew
from warnings import filterwarnings
filterwarnings('ignore')
df_transactions = pd.read_csv('transactions.csv')
daily_revenue= df_transactions.groupby("days_after_open").sum()['revenue']
df_transactions["daily_revenue"] = daily_revenue
x = df_transactions["days_after_open"]
y = df_transactions["daily_revenue"]
plt.scatter(x,y,alpha=0.2)
plt.xlabel("Days After Open (days)")
plt.ylabel("Daily Reveue ($)")
plt.savefig("plot")
dataframe image
Please define the 'daily_revenue' following before moving to the scatter plot.
y = df_transactions["daily_revenue"]
Related
I'm having trouble creating this plot in spyder:
import seaborn as sns
import pandas as pd
from pandas.api.types import CategoricalDtype
diamonds= sns.load_dataset("diamonds")
df=diamonds.copy()
cut_Kategoriler=["Fair","Good","Very Good","Premium","Ideal"]
df.cut=df.cut.astype(CategoricalDtype(categories = cut_Kategoriler,ordered=True))
print(df.head())
sns.catplot(x="cut",y="price",data=df)
sns.barplot(x="cut",y="price",hue="color",data=df)
I want create two plots. But these plots overflap. How can i separate the graphics in the last two lines?
You need to import matplotlib.pyplot as plt and then add plt.show() after each of the two plots.
The modified code is added below:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt # Import Matplotlib
from pandas.api.types import CategoricalDtype
diamonds = sns.load_dataset("diamonds")
df=diamonds.copy()
cut_Kategoriler=["Fair","Good","Very Good","Premium","Ideal"]
df.cut=df.cut.astype(CategoricalDtype(categories = cut_Kategoriler,ordered=True))
print(df.head())
sns.catplot(x="cut",y="price",data=df)
plt.show() # Display the first plot
sns.barplot(x="cut",y="price",hue="color",data=df)
plt.show() # Display the second plot
I have a netcdf file ('test.nc'). The variables of the netcdf file are the following:
variables(dimensions): float64 lon(lon), float64 lat(lat), int32 crs(), int16 Band1(lat,lon)
I am interested in the ´Band1´ variable.
Using cartopy, I could plot the data using the following code:
import numpy as np
import pandas as pd
import gzip
from netCDF4 import Dataset,num2date
import time
import matplotlib.pyplot as plt
import os
import matplotlib as mplt
#mplt.use('Agg')
import cartopy.crs as ccrs
import cartopy.feature as cfea
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
projection=ccrs.PlateCarree()
bbox=[-180,180,-60,85];creg='glob'
mplt.rc('xtick', labelsize=9)
mplt.rc('ytick', labelsize=9)
nc = Dataset('test.nc','r')
lat = nc.variables['lat'][:]
lon = nc.variables['lon'][:]
kopi= (nc.variables['Band1'][:,:])
nc.close()
fig=plt.figure(figsize=(11,5))
ax=fig.add_subplot(1,1,1,projection=projection)
ax.set_extent(bbox,projection)
ax.add_feature(cfea.COASTLINE,lw=.5)
ax.add_feature(cfea.RIVERS,lw=.5)
ax.add_feature(cfea.BORDERS, linewidth=0.6, edgecolor='dimgray')
ax.background_patch.set_facecolor('.9')
levels=[1,4,8,11,14,17,21,25,29]
cmap=plt.cm.BrBG
norm=mplt.colors.BoundaryNorm(levels,cmap.N)
ddlalo=.25
pc=ax.contourf(lon,lat,kopi,levels=levels,transform=projection,cmap=cmap,norm=norm,extend='both')
divider = make_axes_locatable(ax)
ax_cb = divider.new_horizontal(size="3%", pad=0.1, axes_class=plt.Axes)
fig.colorbar(pc,extend='both', cax=ax_cb)
fig.add_axes(ax_cb)
fig.colorbar(pc,extend='both', cax=ax_cb)
ttitle='Jony'
ax.set_title(ttitle,loc='left',fontsize=9)
plt.show()
However, I would like just to plot a selection of values inside the variable ´Band1´. I thought I could use the following code:
kopi= (nc.variables['Band1'][:,:])<=3
However it does not work and instead of plotting the area corresponding to the value selection it selected the all map.
How could I select and plot a desired range of values inside the variables ´Band1´?
Just mask the values with np.nan
kopi[kopi <=3] = np.nan
This should yield to white pixels in your plot.
Please provide test data in the future.
Code is below
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns
df = pd.DataFrame(np.random.rand(10,3), columns=["A", "B","C"])
km = KMeans(n_clusters=3).fit(df)
df['cluster_id'] = km.labels_
test = {0:"Blue", 1:"Red", 2:"Green"}
#sns.scatterplot()
plt.show()
I am trying to plot without x,y that is column constraints. I need to plot any number of columns just want to plot the cluster graph
I have a scatter plot im working with and for some reason im not seeing all the x values on my graph
#%%
from pandas import DataFrame, read_csv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
file = r"re2.csv"
df = pd.read_csv(file)
#sns.set(rc={'figure.figsize':(11.7,8.27)})
g = sns.FacetGrid(df, col='city')
g.map(plt.scatter, 'type', 'price').add_legend()
This is an image of a small subset of my plots, you can see that Res is displaying, the middle bar should be displaying Con and the last would be Mlt. These are all defined in the type column from my data set but are not displaying.
Any clue how to fix?
Python is doing what you tell it to do. Just pick different features, presumably things that make more sense for plotting, if you want to generate a more interesting plots. See this generic example below.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")
tips = sns.load_dataset("tips")
sns.relplot(x="total_bill", y="tip", hue="smoker", data=tips);
Personally, I like plotly plots, which are dynamic, more than I like seaborn plots.
https://plotly.com/python/line-and-scatter/
I am unable to get regression line and the variance bounds around it while plotting seaborn.pairplot with kind=reg as shown in the examples at http://seaborn.pydata.org/generated/seaborn.pairplot.html
import pandas pd
import seaborn as sns
import numpy as np
import matplotlib as plt
# Preparing random dataFrame with two colums, viz., random x and lag-1 values
lst1 = list(np.random.rand(10000))
df = pd.DataFrame({'x1':lst1})
df['x2'] = df['x1'].shift(1)
df = df[df['x2'] > 0]
# Plotting now
pplot = sns.pairplot(df, kind="reg")
pplot.set(ylim=(min(df['x1']), max(df['x1'])))
pplot.set(xlim=(min(df['x1']), max(df['x1'])))
plt.show()
The regression line is there, you just don't see it, because it's hidden by the unnaturally high number of points in the plot.
So let's reduce the number of points and you'll see the regression as expected.
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
# Preparing random dataFrame with two colums, viz., random x and lag-1 values
lst1 = list(np.random.rand(100))
df = pd.DataFrame({'x1':lst1})
df['x2'] = df['x1'].shift(1)
df = df[df['x2'] > 0]
# Plotting now
pplot = sns.pairplot(df, kind="reg")
plt.show()