How to set x axis title and title on seaborn distplot - python

I have a pandas dataframe that looks like this
import pandas as pd
dt = pd.DataFrame({'var':[1,1,1,2,2,3,3,3,3,3]})
And I am creating a dist plot like this:
import seaborn as sns
fig = sns.distplot(dt['var'], norm_hist=False, kde=False, bins=3).get_figure()
And then I am saving this plot to a pdf
from matplotlib.backends.backend_pdf import PdfPages
pdf = PdfPages('foo.pdf')
pdf.savefig(fig, height=10, width=18, dpi=500, bbox_inches='tight', pad_inches=0.5)
plt.close()
How can I change the title and x_axis title at the plot ?

I think you can use pyplot
Try:
plt.xlabel("x-axis")
plt.title("title")

import seaborn as sns
import matplotlib.pyplot as plt
fig = sns.distplot(dt['var'], norm_hist=False, kde=False, bins=3).get_figure()
plt.title("something")
plt.xlabel("something")
plt.ylabel("something")
from matplotlib.backends.backend_pdf import PdfPages
pdf = PdfPages('foo.pdf')
pdf.savefig(fig, height=10, width=18, dpi=500, bbox_inches='tight', pad_inches=0.5)
plt.close() #if you want to present on jupyter you need to comment this out.

Related

How to create specific plots using Pandas and then store them as PNG files?

So I am trying to create histograms for each specific variable in my dataset and then save it as a PNG file.
My code is as follows:
import pandas as pd
import matplotlib.pyplot as plt
x=combined_databook.groupby('x_1').hist()
x.figure.savefig("x.png")
I keep getting "AttributeError: 'Series' object has no attribute 'figure'"
Use matplotlib to create a figure and axis objects, then tell pandas which axes to plot on using the ax argument. Finally, use matplotlib (or the fig) to save the figure.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Sample Data (3 groups, normally distributed)
df = pd.DataFrame({'gp': np.random.choice(list('abc'), 1000),
'data': np.random.normal(0, 1, 1000)})
fig, ax = plt.subplots()
df.groupby('gp').hist(ax=ax, ec='k', grid=False, bins=20, alpha=0.5)
fig.savefig('your_fig.png', dpi=200)
your_fig.png
Instead of using *.hist() I would use matplotlib.pyplot.hist().
Example :
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
y =[10, 20,30,40,100,200,300,400,1000,2000]
x = np.arange(10)
fig = plt.figure()
ax = plt.subplot(111)
ax.plot(x, y, label='$y = Values')
plt.title('my plot')
ax.legend()
plt.show()
fig.savefig('tada.png')

How to live update Matplotlib plot on top of a background image?

I'm trying to have my matplotlib plot update in real-time as data is added to a CSV file. The plot is of a small geographic location, axes given by longitude and latitude. This is what I have so far:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
df = pd.read_csv("cayugacoords.txt")
BoundaryBox = [-76.5119, -76.5013, 42.4596, 42.4642]
ruh_m = plt.imread('map.png')
fig, ax = plt.subplots(figsize=(8, 7))
ax.scatter(df.longitude, df.latitude, zorder=1, alpha=1, c='r', s=10)
ax.set_title('Cayuga Lake Shore')
ax.set_xlim(BoundaryBox[0], BoundaryBox[1])
ax.set_ylim(BoundaryBox[2], BoundaryBox[3])
ax.imshow(ruh_m, zorder=0, extent=BoundaryBox, aspect='equal')
plt.show()
And this is what shows when I run the code (the three points on the bottom left are already in the CSV file):
Current plot
And here's the background image on its own: Cayuga Lake
I want the map to be regularly updated as new coordinates are added to the CSV file. How can this be done? I've looked into animation tools but I'm having trouble retaining the background image of the map while updating the plot. For reference, the CSV file "cayugacoords.txt" looks like this:
longitude,latitude
-76.51,42.46
-76.511,42.46
-76.5105,42.46
Thank you!
An alternative solution which updates only the points on the background image is provided by using ax.collections = [] which clears ALL lines plotted on the image. For the sake of demonstration I plot each coordinate per frame.
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation as animation
df = pd.read_csv("cayugacoords.txt")
BoundaryBox = [-76.5119, -76.5013, 42.4596, 42.4642]
ruh_m = plt.imread('map.png')
fig, ax = plt.subplots(figsize=(8, 7))
ax.set_title('Cayuga Lake Shore')
ax.set_xlim(BoundaryBox[0], BoundaryBox[1])
ax.set_ylim(BoundaryBox[2], BoundaryBox[3])
ax.imshow(ruh_m, zorder=0, extent=BoundaryBox, aspect='equal')
def animate(nframe):
ax.collections = []
points = ax.scatter(df.longitude[nframe], df.latitude[nframe], zorder=1,
alpha=1, c='r', s=10)
return
anim = animation.FuncAnimation(fig, animate, frames=3)
This code worked for me. It seems quite hacky but it works. You can adjust the time.sleep to your liking.
from matplotlib import pyplot as plt
from IPython.display import clear_output
import pandas as pd
import numpy as np
import time
%matplotlib inline
ruh_m = plt.imread('map.png')
BoundaryBox = [-76.5119, -76.5013, 42.4596, 42.4642]
while True:
clear_output(wait=True)
df = pd.read_csv("cayugacoords.txt")
fig, ax = plt.subplots(figsize=(10, 10))
ax.scatter(df.longitude, df.latitude, zorder=1, alpha=1, c='r', s=10)
ax.set_title('Cayuga Lake Shore')
ax.set_xlim(BoundaryBox[0], BoundaryBox[1])
ax.set_ylim(BoundaryBox[2], BoundaryBox[3])
ax.imshow(ruh_m, zorder=0, extent=BoundaryBox, aspect='equal')
plt.show()
time.sleep(1E-3)

How to change border color of violin plot in pandas graph?

I want to change the color of lineborder of violinplots.
I can set lines.linewidth to 0 but I want to show borders not to hide them. How to change the color of the border?
sns.set_context("paper", rc={"lines.linewidth": 0.8})
My code is as follows:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count"
)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
Thank you!
this should be very close to what you're looking for:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import style
import pandas as pd
import numpy as np
def patch_violinplot(palette, n):
from matplotlib.collections import PolyCollection
ax = plt.gca()
violins = [art for art in ax.get_children() if isinstance(art, PolyCollection)]
colors = sns.color_palette(palette, n_colors=n) * (len(violins)//n)
for i in range(len(violins)):
violins[i].set_edgecolor(colors[i])
datasets = pd.read_csv("merged.csv", index_col=0);
df = datasets
df.protocol = df.protocol.astype(str)
num_cols = df['protocol'].nunique()
f, ax = plt.subplots(figsize=(18, 6))
sns.violinplot(x="time",
y="values",
hue="protocol",
data=df,
bw=.5,
scale="count",
palette="deep"
)
patch_violinplot("deep", num_cols)
sns.despine(left=True)
f.suptitle('Title', fontsize=22, fontweight='bold')
ax.set_xlabel("Time",size = 16,alpha=0.7)
ax.set_ylabel("Values",size = 16,alpha=0.7)
ax.set_xticklabels(df.qber, rotation=90)
ax.grid(True)
plt.legend(loc='upper right')
plt.grid(linestyle='--', alpha=0.7)
fig = ax.get_figure()
fig.savefig('time_v.pdf', bbox_inches='tight')
The patch_violin function came from here.

Python seaborn legends cut off

The figure resulting from the Python code below unfortunately cuts off part of the legends. How can I avoid this? Did I miss a parameter in the sns call or is this due to how I've set up my PyCharm IDE?
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('gm_2008_region.csv')
df = df.drop('Region', axis=1)
plt.figure()
sns.heatmap(df.corr(), square=True, cmap='RdYlGn')
plt.show()
This is the resulting figure:
The .csv file can be found here.
Try adding plt.subplots_adjust(bottom=0.28) as follows:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('gm_2008_region.csv')
df = df.drop('Region', axis=1)
plt.figure()
sns.heatmap(df.corr(), square=True, cmap='RdYlGn')
plt.subplots_adjust(bottom=0.28)
plt.show()
Giving you:
You might want to change the figsize of plt.figure such as...
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
df = pd.read_csv('gm_2008_region.csv')
df = df.drop('Region', axis=1)
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), square=True, cmap='RdYlGn')
plt.show()

Improve updated speed in Matplotlib

I'd like to update a matrix text in dynamic by using animation function of matplotlib. But I found that if the data array is too large , the animation will become very very slow. Is there any way to improve it ?
from matplotlib import animation
import matplotlib.pyplot as plt
import numpy as np
fig, ax = plt.subplots(figsize=(10,20))
def updatefig(i):
plt.cla()
ax.grid()
data = np.random.rand(50,50)
ax.set_xticks(np.arange(data.shape[1]+1))
ax.set_yticks(np.arange(data.shape[0]+1))
for y in range(data.shape[0]):
for x in range(data.shape[1]):
plt.text(x + 0.5 , y + 0.5, '%.1f' % data[y, x],horizontalalignment='center',verticalalignment='center',color='b',size = 6)
plt.draw()
anim = animation.FuncAnimation(fig, updatefig,interval=50)
plt.show()
Actually, I wants to create a heatmmap plot with data values like below link. But use annotations is the only way i could figure out.
Heamap with values
Find a workaround by import seaborn module.
But how to avoid the graph keep flashing
from matplotlib import animation
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
fig, ax = plt.subplots(figsize=(10,20))
sns.set()
def updatefig(i):
plt.clf()
data = np.random.rand(10,10)
sns.heatmap(data, annot=True, linewidths=.5,cbar=False)
anim = animation.FuncAnimation(fig, updatefig,interval=50)
plt.show()

Categories