Pandas doens't show scatter matrix - python

I don't see a scatter matrix if I run the following code in Visual Studio Code:
import numpy as np
import pandas as pd
df = pd.DataFrame(np.random.randn(1000, 4), columns=\['A','B','C','D'\])
pd.plotting.scatter_matrix(df, alpha=0.2)
There is also no error message in the command line.

you want scatter matrix in question title but you use plot in code. Will you try this?
import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
scatter_matrix(df, alpha=0.2)
if you still don't see scatter matrix,
according to #medium-dimensional's suggestion you have to import pyplot:
import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
scatter_matrix(df, alpha=0.2)
plt.show()

Related

Python Why does my chart disappear when using matplotlip.pyplot.figure(figsize)

When rendering matplotlib charts using pyscript and using figure(figsize) the chart disappears and replaces the x and y axes. Why is this happening?
With figsize
Without figsize
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pyodide.http import open_url
url = open_url('../data/salaries.csv')
df = pd.read_csv(url)
df_no_outliers=df[(df.salary_in_usd>min) & (df.salary_in_usd<max)]
top_jobs=df_no_outliers.job_title.value_counts()[:7]
plt.figure(2)
barplot = sns.barplot(x='work_year', y='salary_in_usd', data=df_no_outliers)
plt.tight_layout()
plt.figure(figsize=(15,8))
plt

Creating scatter plot

Can someone help me with how to create a scatterplot. I have written the following code, however, it is not the scatter plot link that I expected as all data only concentrate 3 values of x-variable
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from scipy.stats import skew
from warnings import filterwarnings
filterwarnings('ignore')
df_transactions = pd.read_csv('transactions.csv')
daily_revenue= df_transactions.groupby("days_after_open").sum()['revenue']
df_transactions["daily_revenue"] = daily_revenue
x = df_transactions["days_after_open"]
y = df_transactions["daily_revenue"]
plt.scatter(x,y,alpha=0.2)
plt.xlabel("Days After Open (days)")
plt.ylabel("Daily Reveue ($)")
plt.savefig("plot")
dataframe image
Please define the 'daily_revenue' following before moving to the scatter plot.
y = df_transactions["daily_revenue"]

How to display Legend with seaborn.kdeplot

I have been trying add the Legend to my code below.
It should have worked when I add the "Label". But it just won't show, not sure what I did wrong.
Packages Used
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import kruskal
from sklearn.datasets import load_iris
Df1 = pd.read_csv(r"C:\Users\pc admin\Desktop\SUTD Programming\Data Wrangling\Personal Assigment\IBM Data.csv", header=0)
plt.figure(figsize=(20,8))
plt.style.use('seaborn-colorblind')
plt.grid(True, alpha=0.5)
sns.kdeplot(Df1.loc[Df1['Attrition'] == 'No', 'JobSatisfaction'], **label = "Previous-Employee"**)
sns.kdeplot(Df1.loc[Df1['Attrition'] == 'Yes', 'JobSatisfaction'], **label ="Current-Employees"**)
plt.xlabel('JobSatisfaction')
plt.xlim(left=0)
plt.ylabel('Density')
plt.title('Distance From Home Distribution in Percent by Attrition Status');
You simply need to call the .legend() method of your Axes object. The plotting functions of seaborn return the reference to the Axes directly which is handy. See the documentation of sns.kdeplot
ax = sns.kdeplot(...)
ax.legend(loc="upper right")
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import kruskal
from sklearn.datasets import load_iris
import matplotlib.patches as mpatches
Df1 = pd.r`enter code here`ead_csv(
r"C:\Users\pc admin\Desktop\SUTD Programming\Data
Wrangling\Personal Assigment\IBM Data.csv", header=0)
plt.figure(figsize=(20, 8))
plt.style.use('seaborn-colorblind')
plt.grid(True, alpha=0.5)
sns.kdeplot(Df1.loc[Df1['Attrition'] == 'No',
'JobSatisfaction'], **label="Previous-Employee"**)
sns.kdeplot(Df1.loc[Df1['Attrition'] == 'Yes',
'JobSatisfaction'], **label="Current-Employees"**)
plt.xlabel('JobSatisfaction')
plt.xlim(left=0)
plt.ylabel('Density')
plt.title('Distance From Home Distribution in Percent
by Attrition Status')
handles = [mpatches.Patch(facecolor=plt.cm.Reds(100),
label="Active Employee"),
mpatches.Patch(facecolor=plt.cm.Blues(100),
label="Ex employee")]
plt.legend(handles=handles)
# chose whatever colour you want hope thats help you
#out :)
#legends was deprecated in python 3 if you have object
of #sns.kdeplot

How to print the heatmap in a square shape using seaborn?

When I run the code below I notice that the heatmap does not have a square shape knowing that I have used square=True but it did not work! Any idea how can I print the heatmap in a square format? Thank you!
The code:
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib as plt
import os
import seaborn as sns
temp_hourly_A5_A7_AX_ASHRAE=pd.read_csv('C:\\Users\\cvaa4\\Desktop\\projects\\s\\temp_hourly_A5_A7_AX_ASHRAE.csv',index_col=0, parse_dates=True, dayfirst=True, skiprows=2)
sns.heatmap(temp_hourly_A5_A7_AX_ASHRAE,cmap="YlGnBu", vmin=18, vmax=27, square=True, cbar=False, linewidth=0.0001);
The result:
square=True should work to have square cells, below is a working example:
import pandas as pd
import numpy as np
import seaborn as sns
df = pd.DataFrame(np.tile([0,1], 15*15).reshape(-1,15))
sns.heatmap(df, square=True)
If you want a square shape of the plot however, you can use set_aspect and the shape of the data:
ax = sns.heatmap(df)
ax.set_aspect(df.shape[1]/df.shape[0]) # here 0.5 Y/X ratio
You can use matplotlib and set a figsize before plotting heatmap.
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
rnd = np.random.default_rng(12345)
data = rnd.uniform(-100, 100, [100, 50])
plt.figure(figsize=(6, 5))
sns.heatmap(data, cmap='viridis');
Note that I used figsize=(6, 5) rather than a square figsize=(5, 5). This is because on a given figsize, seaborn also puts the colorbar, which might cause the heatmap to be squished a bit. You might want to change those figsizes too depending on what you need.

seaborn mixing of plots

I'm having trouble creating this plot in spyder:
import seaborn as sns
import pandas as pd
from pandas.api.types import CategoricalDtype
diamonds= sns.load_dataset("diamonds")
df=diamonds.copy()
cut_Kategoriler=["Fair","Good","Very Good","Premium","Ideal"]
df.cut=df.cut.astype(CategoricalDtype(categories = cut_Kategoriler,ordered=True))
print(df.head())
sns.catplot(x="cut",y="price",data=df)
sns.barplot(x="cut",y="price",hue="color",data=df)
I want create two plots. But these plots overflap. How can i separate the graphics in the last two lines?
You need to import matplotlib.pyplot as plt and then add plt.show() after each of the two plots.
The modified code is added below:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt # Import Matplotlib
from pandas.api.types import CategoricalDtype
diamonds = sns.load_dataset("diamonds")
df=diamonds.copy()
cut_Kategoriler=["Fair","Good","Very Good","Premium","Ideal"]
df.cut=df.cut.astype(CategoricalDtype(categories = cut_Kategoriler,ordered=True))
print(df.head())
sns.catplot(x="cut",y="price",data=df)
plt.show() # Display the first plot
sns.barplot(x="cut",y="price",hue="color",data=df)
plt.show() # Display the second plot

Categories