Matplotlib help: Formatting a scatter plot to be square

Matplotlib help: Formatting a scatter plot to be square - python

I have a scatter plot where the axis are both limited at -100 and 100. However, when I graph the data, I always get an unappealing looking plot that is rectangular with incorrect axis labels. I'd like the plot to be a square with -100 and 100 as the last axis labels. Does anyone have advice for fixing this formatting issue?
My code is as follows:
import scipy.stats
import numpy as np
r = scipy.stats.pearsonr(x_val, y_val)
fig, ax = matplotlib.pyplot.subplots()
ax.scatter(x_val, y_val, s=75, color='green', edgecolor='black', linewidth = 2, alpha=0.4)
ax.set_axisbelow(True)
matplotlib.pyplot.axvline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.axhline(0, c='#262626', linewidth=1.5, alpha=0.9)
matplotlib.pyplot.grid(linewidth=1, color='#bfbfbf')
matplotlib.pyplot.xticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.yticks(np.arange(-100, 100, 20.0),fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.text(-95, 85,'Pearson\'s r: %.3f'%r[0], fontsize=14, fontweight='bold',
fontname='Helvetica')
matplotlib.pyplot.show()

Related

scatterplot matrix with marginal probability distributions in seaborn

It is straightfoward to do scatter plot matrices with seaborn pairplot. Jointplot also allows combining scatter plots with marginal probability distributions for a single plot.
Although the option diag_kind='kde' let you plot the probability distributions in the diagonal (useful when x_varsand y_vars are the same) I want to combine both to have marginal probability distributions in a matrix scatter plot. Something like this:
How do I get marginal probability distributions in a matrix scatter plot in seaborn as shown in my screenshot above?

Many thanks mwaskom for the guiding.
As you suggested, I built my own matplotlib figure and plotted the seaborn plots there guided by this piece of documentation.
def basic_conf(f,a,xin,yin,x,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
if xin !=0:
ax.set_yticklabels([])
ax.set_ylabel(" ",fontsize=0).set_visible(False)
ax.set_ylabel(y,fontsize=10)
ax.set_xticklabels([])
ax.set_xlabel(" ",fontsize=0).set_visible(False)
return ax
def xhist_conf(f,a,x):
ax = f.add_subplot(a)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.set_yticklabels([])
ax.yaxis.set_ticks_position('none')
ax.set_xlabel(x,fontsize=10)
ax.set_ylabel(" ").set_visible(False)#,fontsize='xx-small'
return ax
def yhist_conf(f,a,y):
ax = f.add_subplot(a)
ax.tick_params(axis='both', which='major', labelsize=10)
ax.spines["right"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.xaxis.set_ticks_position('none')
ax.set_xlabel(" ",fontsize='xx-small').set_visible(False)
ax.set_ylabel(" ",fontsize=0).set_visible(False)
return ax
def includer(ax,x,y):
r,_=stats.pearsonr(concat_convert[x],concat_convert[y])
ax.text(0.1, 0.9, f'ρ = {r:.2f}', transform=ax.transAxes)#,fontsize='xx-small'
x_vars=["$P_{LA}$", "$R^{Ao}_P$", "$C^{Ao}_P$", "$R^{Ao}_S$", "$B_{VAD}$", "$A_{VAD}$", "HR", "EF"]
y_vars=["${Q}^{avg}_{M}$", "${Q}^{max}_{M}$","${Q}^{avg}_{Ao}$", "${Q}^{max}_{Ao}$", "${Q}^{avg}_{VAD}$", "${Q}^{max}_{VAD}$", "$Q_{RAT}$"]
sns.set(context="paper",font_scale=1.75,style="ticks")
f = plt.figure(figsize=(18, 16), dpi=600)
gs = f.add_gridspec(8, 9)
plt.rcParams['font.size'] = '10'
plt.rcParams['xtick.labelsize']='8'
with sns.axes_style("ticks"):
xin=0
for x in x_vars:
yin=0
for y in y_vars:
ax = basic_conf(f,gs[yin,xin],xin,yin,x,y)
sns.regplot(ax=ax, data=concat_convert, x=x, y=y, scatter_kws={'s':4})
includer(ax,x,y)
yin=yin+1
xin=xin+1
xin=0
for x in x_vars:
ax = xhist_conf(f,gs[yin,xin],x)
sns.histplot(ax=ax, data=concat_convert, x=x, kde=True)
xin=xin+1
yin=0
for y in y_vars:
ax = yhist_conf(f,gs[yin,xin],y)
sns.histplot(ax=ax, data=concat_convert, y=y, kde=True)
yin=yin+1
for i in range(len(y_vars)):
ax = f.add_subplot(gs[i,2])
ax.set_xlim((0.001,0.0014))
ax = f.add_subplot(gs[len(y_vars),0])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
ax = f.add_subplot(gs[len(y_vars),5])
ax.ticklabel_format(style='sci',scilimits=(0,0), axis='x')
And it get me exactly what I want:
Many thanks.
EDIT: Final code snippet and obtained plot.

Change coordinates for origin in scatter plot with centred axes

I have some datasets that I'm visualizing in a scatter plot. I have a bunch of mean values, and a global mean. What I'm after, but cant really achieve,is to have a scatter plot that is centered in the plot, while also placing the origin at the global mean.
This is the code that defines the layout of the plot:
plt.figure(1)
plt.suptitle('Example')
plt.xlabel('x (pixels)')
plt.ylabel('y (pixels)')
ax = plt.gca()
ax.spines['left'].set_position('center')
ax.spines['right'].set_color('none')
ax.spines['bottom'].set_position('center')
ax.spines['top'].set_color('none')
ax.scatter(x_data, y_data, color=color, alpha=0.08, label=csv_file_name)
ax.plot(global_mean[0], global_mean[1], color='green',
marker='x', label='Global mean')
This produces the following plot (the ax.scatter() is called multiple times for each dataset, but it's not in the code above):
I've tried playing around with the ax.set_position() parameters but nothing have worked well so far. Is there a way to do what I'm after with matplotlib, or do I need to use some other plot library?

You can use the ax.spines() method to move them around.
import numpy as np
import random
import matplotlib.pyplot as plt
#generate some random data
x = np.linspace(1,2, 100)
y = [random.random() for _ in range(100)]
fig = plt.figure(figsize=(10,5))
# original plot
ax = fig.add_subplot(1,2,1)
ax.scatter(x, y)
# same plot, but with the spines moved
ax2 = fig.add_subplot(1,2,2)
ax2.scatter(x, y)
# move the left spine (y axis) to the right
ax2.spines['left'].set_position(('axes', 0.5))
# move the bottom spine (x axis) up
ax2.spines['bottom'].set_position(('axes', 0.5))
# turn off the right and top spines
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
plt.show()

Scatter plot with markers changing size according to number of represented points

I'm plotting a simple scatter plot:
It represents my data correctly, however there is many datapoints with coordinates (1.00,1.00) and in the plot, they appear under a single marker (top right corner). I'd like to have a functionality that changes the size of every marker according to the number of points it is representing. Will appreciate any help. Here's my code:
def saveScatter(figureTitle, xFeature, yFeature, xTitle, yTitle):
''' save a scatter plot of xFeatures vs yFeatures '''
fig = plt.figure(figsize=(8, 6), dpi=300)
ax = fig.add_subplot(111)
ax.scatter(dfModuleCPositives[names[xFeature]][:], dfModuleCPositives[names[yFeature]][:], c='r', marker='x', alpha=1, label='Module C Positives')
ax.scatter(dfModuleCNegatives[names[xFeature]][:], dfModuleCNegatives[names[yFeature]][:], c='g', alpha=0.5, label='Module C Negatives')
ax.scatter(dfModuleDPositives[names[xFeature]][:], dfModuleDPositives[names[yFeature]][:], c='k', marker='x', alpha=1, label='Module D Positives')
ax.scatter(dfModuleDNegatives[names[xFeature]][:], dfModuleDNegatives[names[yFeature]][:], c='b', alpha=0.5, label='Module D Negatives')
ax.set_xlabel(xTitle, fontsize=10)
ax.set_ylabel(yTitle, fontsize=10)
ax.set_title(figureTitle)
ax.grid(True)
ax.legend(loc="lower right")
fig.tight_layout()
plt.show()
return ax

Modifying y-axis in histogram in Pandas matplotlib

I have 33960 - 0's and 144 - 1's in data_train['fk_action_code_id'].
On plotting histogram, the bar of 1 is so less that it is not visible. Is there any way I can raise the bar of 1 by modifying the Y-Axis so that the bar of 1 is visible?
I tried this but it doesn't work
b=[0,145, 35000]
plt.yticks(b)
plt.hist(data_train['fk_action_code_id'], histtype='bar', rwidth=0.8)

A few suggestions: you could
1.) create two y axes, one for the zeros and the other for the ones
2.) multiply one of the bars by a numerical factor, so that they are of the same order of magnitude (you should explain this in the plot legend then)
3.) draw a logarithmic histogram with the option log=True in the plt.hist() command.
The following will produce plots for these three options:
import numpy as np
import matplotlib.pyplot as plt
zeros = np.zeros([35000])
modifier = 100
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
bins = np.asarray([-0.5, 0.5, 1.5])
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=False)
plt.xticks([0,1])
plt.title('Multiplied with a factor')
plt.savefig('multiplied.png')
plt.show()
plt.clf()
modifier = 1
ones = np.ones([145*modifier])
arr = np.hstack((zeros, ones))
plt.hist(arr, bins=bins, facecolor='green', alpha=0.75, log=True)
plt.xticks([0,1])
plt.title('Logarithmic')
plt.savefig('log.png')
plt.show()
plt.clf()
ax1 = plt.gca()
ax2 = ax1.twinx()
ax1.set_yticks([0, 35000, 40000])
ax1.set_ylim(0, 40000)
ax2.set_yticks([0, 145, 200])
ax2.set_ylim(0, 200)
ax1.hist(arr, bins=[bins[0], bins[1]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
ax2.hist(arr, bins=[bins[1], bins[2]], facecolor='green', alpha=0.75, log=False)#, histtype='bar')#, rwidth=1.0)
plt.xticks([0,1])
plt.title('Two y axes')
plt.savefig('two_axes.png')
plt.show()
plt.clf()

Scatter plot labels in one line - Matplotlib

It's possible to set all labels at once in Matplolib?
For example, I have this piece of code to plot a scatter plot:
cmap = plt.get_cmap('Set1')
colors = [cmap(i) for i in numpy.linspace(0, 1, simulations+1)]
plt.figure(figsize=(7, 7))
plt.scatter(coords[:, 0], coords[:, 1], marker='o', c=colors, s=50, edgecolor='None')
plt.legend(loc='lower left',)
where simulations = 7 and coords is a numpy.array with shape (7, 2).
This gives me a plot like that:
If I change the last line for:
plt.scatter(coords[:, 0], coords[:, 1], marker='o', c=colors, s=50, edgecolor='None', label=range(simulations))
plt.legend(loc='lower left')
I get:
I'm wondering if I'll have to do a loop to do the scatter and set each label of if there is some way to do all at once.
Thank you.

I'm not sure how to do it with a scatter plot. But I'm not sure if there is an advantage to use scatter rather than plot if you want different labels.
How about this?
import numpy as np
import matplotlib.pyplot as plt
n = 10
coords = np.random.random((n,2))
cmap = plt.get_cmap('Set1')
for i, (x, y) in enumerate(coords):
plt.plot(x, y, 'o', color=cmap(i/float(n)), label='%i'%i, ms=9, mec='none')
plt.axis((-0.5, 1.5, -0.5, 1.5))
plt.legend(loc='lower left', numpoints=1, frameon=False)
plt.show()

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Matplotlib help: Formatting a scatter plot to be square - python

Related

scatterplot matrix with marginal probability distributions in seaborn

Change coordinates for origin in scatter plot with centred axes

Scatter plot with markers changing size according to number of represented points

Modifying y-axis in histogram in Pandas matplotlib

Scatter plot labels in one line - Matplotlib

Categories

Resources