I have a scatter plot where the axis are both limited at -100 and 100. However, when I graph the data, I always get an unappealing looking plot that is rectangular with incorrect axis labels. I'd like the plot to be a square with -100 and 100 as the last axis labels. Does anyone have advice for fixing this formatting issue?
My code is as follows:
import scipy.stats
import numpy as np
r = scipy.stats.pearsonr(x_val, y_val)
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x_val, y_val, s=75, color='green', edgecolor='black', linewidth = 2, alpha=0.4)
ax.set_axisbelow(True)
matplotlib.pyplot.axvline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.axhline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.grid(linewidth=1, color='#bfbfbf')
matplotlib.pyplot.xticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.yticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.text(-95, 85,'Pearson\'s r: %.3f'%r[0], fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.show()
Related
It is straightfoward to do scatter plot matrices with seaborn pairplot. Jointplot also allows combining scatter plots with marginal probability distributions for a single plot.
Although the option diag_kind='kde' let you plot the probability distributions in the diagonal (useful when x_varsand y_vars are the same) I want to combine both to have marginal probability distributions in a matrix scatter plot. Something like this:
How do I get marginal probability distributions in a matrix scatter plot in seaborn as shown in my screenshot above?
Many thanks mwaskom for the guiding.
As you suggested, I built my own matplotlib figure and plotted the seaborn plots there guided by this piece of documentation.
def basic_conf(f,a,xin,yin,x,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
if xin !=0:
ax.set_yticklabels([])
ax.set_ylabel(" ",fontsize=0).set_visible(False)
ax.set_ylabel(y,fontsize=10)
ax.set_xticklabels([])
ax.set_xlabel(" ",fontsize=0).set_visible(False)
return ax
def xhist_conf(f,a,x):
ax = f.add_subplot(a)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.set_yticklabels([])
ax.yaxis.set_ticks_position('none')
ax.set_xlabel(x,fontsize=10)
ax.set_ylabel(" ").set_visible(False)#,fontsize='xx-small'
return ax
def yhist_conf(f,a,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.xaxis.set_ticks_position('none')
ax.set_xlabel(" ",fontsize='xx-small').set_visible(False)
ax.set_ylabel(" ",fontsize=0).set_visible(False)
return ax
def includer(ax,x,y):
r,_=stats.pearsonr(concat_convert[x],concat_convert[y])
ax.text(0.1, 0.9, f'ρ = {r:.2f}', transform=ax.transAxes)#,fontsize='xx-small'
x_vars=["$P_{LA}$", "$R^{Ao}_P$", "$C^{Ao}_P$", "$R^{Ao}_S$", "$B_{VAD}$", "$A_{VAD}$", "HR", "EF"]
y_vars=["${Q}^{avg}_{M}$", "${Q}^{max}_{M}$","${Q}^{avg}_{Ao}$", "${Q}^{max}_{Ao}$", "${Q}^{avg}_{VAD}$", "${Q}^{max}_{VAD}$", "$Q_{RAT}$"]
sns.set(context="paper",font_scale=1.75,style="ticks")
f = plt.figure(figsize=(18, 16), dpi=600)
gs = f.add_gridspec(8, 9)
plt.rcParams['font.size'] = '10'
plt.rcParams['xtick.labelsize']='8'
with sns.axes_style("ticks"):
xin=0
for x in x_vars:
yin=0
for y in y_vars:
ax = basic_conf(f,gs[yin,xin],xin,yin,x,y)
sns.regplot(ax=ax, data=concat_convert, x=x, y=y, scatter_kws={'s':4})
includer(ax,x,y)
yin=yin+1
xin=xin+1
xin=0
for x in x_vars:
ax = xhist_conf(f,gs[yin,xin],x)
sns.histplot(ax=ax, data=concat_convert, x=x, kde=True)
xin=xin+1
yin=0
for y in y_vars:
ax = yhist_conf(f,gs[yin,xin],y)
sns.histplot(ax=ax, data=concat_convert, y=y, kde=True)
yin=yin+1
for i in range(len(y_vars)):
ax = f.add_subplot(gs[i,2])
ax.set_xlim((0.001,0.0014))
ax = f.add_subplot(gs[len(y_vars),0])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
ax = f.add_subplot(gs[len(y_vars),5])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
And it get me exactly what I want:
Many thanks.
EDIT: Final code snippet and obtained plot.
I have some datasets that I'm visualizing in a scatter plot. I have a bunch of mean values, and a global mean. What I'm after, but cant really achieve,is to have a scatter plot that is centered in the plot, while also placing the origin at the global mean.
This is the code that defines the layout of the plot:
plt.figure(1)
plt.suptitle('Example')
plt.xlabel('x (pixels)')
plt.ylabel('y (pixels)')
ax = plt.gca()
ax.spines['left'].set_position('center')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('center')
ax.spines['top'].set_color('none')
ax.scatter(x_data, y_data, color=color, alpha=0.08, label=csv_file_name)
ax.plot(global_mean[0], global_mean[1], color='green',
marker='x', label='Global mean')
This produces the following plot (the ax.scatter() is called multiple times for each dataset, but it's not in the code above):
I've tried playing around with the ax.set_position() parameters but nothing have worked well so far. Is there a way to do what I'm after with matplotlib, or do I need to use some other plot library?
You can use the ax.spines() method to move them around.
import numpy as np
import random
import matplotlib.pyplot as plt
#generate some random data
x = np.linspace(1,2, 100)
y = [random.random() for _ in range(100)]
fig = plt.figure(figsize=(10,5))
# original plot
ax = fig.add_subplot(1,2,1)
ax.scatter(x, y)
# same plot, but with the spines moved
ax2 = fig.add_subplot(1,2,2)
ax2.scatter(x, y)
# move the left spine (y axis) to the right
ax2.spines['left'].set_position(('axes', 0.5))
# move the bottom spine (x axis) up
ax2.spines['bottom'].set_position(('axes', 0.5))
# turn off the right and top spines
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
plt.show()
I'm plotting a simple scatter plot:
It represents my data correctly, however there is many datapoints with coordinates (1.00,1.00) and in the plot, they appear under a single marker (top right corner). I'd like to have a functionality that changes the size of every marker according to the number of points it is representing. Will appreciate any help. Here's my code:
def saveScatter(figureTitle, xFeature, yFeature, xTitle, yTitle):
''' save a scatter plot of xFeatures vs yFeatures '''
fig = plt.figure(figsize=(8, 6), dpi=300)
ax = fig.add_subplot(111)
ax.scatter(dfModuleCPositives[names[xFeature]][:], dfModuleCPositives[names[yFeature]][:], c='r', marker='x', alpha=1, label='Module C Positives')
ax.scatter(dfModuleCNegatives[names[xFeature]][:], dfModuleCNegatives[names[yFeature]][:], c='g', alpha=0.5, label='Module C Negatives')
ax.scatter(dfModuleDPositives[names[xFeature]][:], dfModuleDPositives[names[yFeature]][:], c='k', marker='x', alpha=1, label='Module D Positives')
ax.scatter(dfModuleDNegatives[names[xFeature]][:], dfModuleDNegatives[names[yFeature]][:], c='b', alpha=0.5, label='Module D Negatives')
ax.set_xlabel(xTitle, fontsize=10)
ax.set_ylabel(yTitle, fontsize=10)
ax.set_title(figureTitle)
ax.grid(True)
ax.legend(loc="lower right")
fig.tight_layout()
plt.show()
return ax
I have 33960 - 0's and 144 - 1's in data_train['fk_action_code_id'].
On plotting histogram, the bar of 1 is so less that it is not visible. Is there any way I can raise the bar of 1 by modifying the Y-Axis so that the bar of 1 is visible?
I tried this but it doesn't work
b=[0,145, 35000]
plt.yticks(b)
plt.hist(data_train['fk_action_code_id'], histtype='bar', rwidth=0.8)
A few suggestions: you could
1.) create two y axes, one for the zeros and the other for the ones
2.) multiply one of the bars by a numerical factor, so that they are of the same order of magnitude (you should explain this in the plot legend then)
3.) draw a logarithmic histogram with the option log=True in the plt.hist() command.
The following will produce plots for these three options:
import numpy as np
import matplotlib.pyplot as plt
zeros = np.zeros([35000])
modifier = 100
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
bins = np.asarray([-0.5, 0.5, 1.5])
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=False)
plt.xticks([0,1])
plt.title('Multiplied with a factor')
plt.savefig('multiplied.png')
plt.show()
plt.clf()
modifier = 1
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=True)
plt.xticks([0,1])
plt.title('Logarithmic')
plt.savefig('log.png')
plt.show()
plt.clf()
ax1 = plt.gca()
ax2 = ax1.twinx()
ax1.set_yticks([0, 35000, 40000])
ax1.set_ylim(0, 40000)
ax2.set_yticks([0, 145, 200])
ax2.set_ylim(0, 200)
ax1.hist(arr, bins=[bins[0], bins[1]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
ax2.hist(arr, bins=[bins[1], bins[2]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
plt.xticks([0,1])
plt.title('Two y axes')
plt.savefig('two_axes.png')
plt.show()
plt.clf()
It's possible to set all labels at once in Matplolib?
For example, I have this piece of code to plot a scatter plot:
cmap = plt.get_cmap('Set1')
colors = [cmap(i) for i in numpy.linspace(0, 1, simulations+1)]
plt.figure(figsize=(7, 7))
plt.scatter(coords[:, 0], coords[:, 1], marker='o', c=colors, s=50, edgecolor='None')
plt.legend(loc='lower left',)
where simulations = 7 and coords is a numpy.array with shape (7, 2).
This gives me a plot like that:
If I change the last line for:
plt.scatter(coords[:, 0], coords[:, 1], marker='o', c=colors, s=50, edgecolor='None', label=range(simulations))
plt.legend(loc='lower left')
I get:
I'm wondering if I'll have to do a loop to do the scatter and set each label of if there is some way to do all at once.
Thank you.
I'm not sure how to do it with a scatter plot. But I'm not sure if there is an advantage to use scatter rather than plot if you want different labels.
How about this?
import numpy as np
import matplotlib.pyplot as plt
n = 10
coords = np.random.random((n,2))
cmap = plt.get_cmap('Set1')
for i, (x, y) in enumerate(coords):
plt.plot(x, y, 'o', color=cmap(i/float(n)), label='%i'%i, ms=9, mec='none')
plt.axis((-0.5, 1.5, -0.5, 1.5))
plt.legend(loc='lower left', numpoints=1, frameon=False)
plt.show()