A spike map (as shown in the image below, implemented with D3.js) is a method for displaying differences in the magnitude of a certain discrete, abruptly changing phenomenon such as counts of people.
Is there a package I could use (or example code I could follow) to create a static spike map, similar to the map shown above, in Python? e.g. Matplotlib
You could try with a Ridge Plot. It's not exactly the same, but maybe it can work for you. The implementation in seaborn looks like this:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(500)
g = np.tile(list("ABCDEFGHIJ"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)
df["x"] += m
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=15, height=.5, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, "x",
bw_adjust=.5, clip_on=False,
fill=True, alpha=1, linewidth=1.5)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw_adjust=.5)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(0, .2, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, "x")
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.25)
# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)
plt.show()
And creates the following graph
Related
I want to make boxplots with hues but I want to color code it so that each specific X string is a certain color with the hue just being a lighter color. I am able to do a boxplot without a hue. When I incorporate the hue, I get the second boxplot which loses the colors. Can someone help me customize the colors for the figure that contains the hue?
Essentially, its what the answer for this question is but with boxplots.
This is my code:
first boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax,palette=color_dict)
sns.stripplot(ax=ax,y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs,palette=color_dict,
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
second boxplot
order=['Ash1','E1A','FUS','p53']
colors=['gold','teal','darkorange','royalblue']
color_dict=dict(zip(order,colors))
fig,ax=plt.subplots(figsize=(25,15))
bp=sns.boxplot(data=df_idrs, x=df_idrs["construct"], y=df_idrs['Norm_Ef_IDR/Ef_GS'],ax=ax, hue=df_idrs["location"])
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs["location"],
jitter=1, marker='o', alpha=0.4,edgecolor='black',linewidth=1, dodge=True)
ax.axhline(y=1,linestyle="--",color='black',linewidth=2)
plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))
The only thing that changed was the palette to hue. I have seen many examples on here but I am unable to get them to work. Using the second code, I have tried the following:
Nothing happens for this one.
for ind, bp in enumerate(ax.findobj(PolyCollection)):
rgb = to_rgb(colors[ind // 2])
if ind % 2 != 0:
rgb = 0.5 + 0.5 * np.array(rgb) # make whiter
bp.set_facecolor(rgb)
I get index out of range for the following one.
for i in range(0,4):
mybox = bp.artists[i]
mybox.set_facecolor(color_dict[order[i]])
Matplotlib stores the boxes in ax.patches, but there are also 2 dummy patches (used to construct the legend) that need to be filtered away. The dots of the stripplot are stored in ax.collections. There are also 2 dummy collections for the legend, but as those come at the end, they don't form a problem.
Some remarks:
sns.boxplot returns the subplot on which it was drawn; as it is called with ax=ax it will return that same ax
Setting jitter=1in the stripplot will smear the dots over a width of 1. 1 is the distance between the x positions, and the boxes are only 0.4 wide. To avoid clutter, the code below uses jitter=0.4.
Here is some example code starting from dummy test data:
from matplotlib import pyplot as plt
from matplotlib.legend_handler import HandlerTuple
from matplotlib.patches import PathPatch
from matplotlib.colors import to_rgb
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(20230215)
order = ['Ash1', 'E1A', 'FUS', 'p53']
colors = ['gold', 'teal', 'darkorange', 'royalblue']
hue_order = ['A', 'B']
df_idrs = pd.DataFrame({'construct': np.repeat(order, 200),
'Norm_Ef_IDR/Ef_GS': (np.random.normal(0.03, 1, 800).cumsum() + 10) / 15,
'location': np.tile(np.repeat(hue_order, 100), 4)})
fig, ax = plt.subplots(figsize=(12, 5))
sns.boxplot(data=df_idrs, x=df_idrs['construct'], y=df_idrs['Norm_Ef_IDR/Ef_GS'], hue='location',
order=order, hue_order=hue_order, ax=ax)
box_colors = [f + (1 - f) * np.array(to_rgb(c)) # whiten colors depending on hue
for c in colors for f in np.linspace(0, 0.5, len(hue_order))]
box_patches = [p for p in ax.patches if isinstance(p, PathPatch)]
for patch, color in zip(box_patches, box_colors):
patch.set_facecolor(color)
sns.stripplot(y='Norm_Ef_IDR/Ef_GS', x='construct', data=df_idrs, hue=df_idrs['location'],
jitter=0.4, marker='o', alpha=0.4, edgecolor='black', linewidth=1, dodge=True, ax=ax)
for collection, color in zip(ax.collections, box_colors):
collection.set_facecolor(color)
ax.axhline(y=1, linestyle='--', color='black', linewidth=2)
handles = [tuple(box_patches[i::len(hue_order)]) for i in range(len(hue_order))]
ax.legend(handles=handles, labels=hue_order, title='hue category',
handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
loc='upper left', bbox_to_anchor=(1.01, 1))
plt.tight_layout()
plt.show()
I am currently making a plot on matplotlib, which looks like below.
The code for which is:
fig, ax1 = plt.subplots(figsize=(20,5))
ax2 = ax1.twinx()
# plt.subplots_adjust(top=1.4)
ax2.fill_between(dryhydro_df['Time'],dryhydro_df['Flow [m³/s]'],0,facecolor='lightgrey')
ax2.set_ylim([0,10])
AB = ax2.fill_between(dryhydro_df['Time'],[12]*len(dryhydro_df['Time']),9.25,facecolor=colors[0],alpha=0.5,clip_on=False)
ab = ax2.scatter(presence_df['Datetime'][presence_df['AB']==True],[9.5]*sum(presence_df['AB']==True),marker='X',color='black')
# tidal heights
ax1.plot(tide_df['Time'],tide_df['Tide'],color='dimgrey')
I want the blue shaded region and black scatter to be above the plot. I can move the elements above the plot by using clip_on=False but I think I need to extend the space above the plot to do visualise it. Is there a way to do this? Mock-up of what I need is below:
You can use clip_on=False to draw outside the main plot. To position the elements, an xaxis transform helps. That way, x-values can be used in the x direction, while the y-direction uses "axes coordinates". ax.transAxes() uses "axes coordinates" for both directions.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
dates = pd.date_range('2018-07-01', '2018-07-31', freq='H')
xs = dates.to_numpy().astype(float)
ys = np.sin(xs * .091) * (np.sin(xs * .023) ** 2 + 1)
fig, ax1 = plt.subplots(figsize=(20, 5))
ax1.plot(dates, ys)
ax1.scatter(np.random.choice(dates, 10), np.repeat(1.05, 10), s=20, marker='*', transform=ax1.get_xaxis_transform(),
clip_on=False)
ax1.plot([0, 1], [1.05, 1.05], color='steelblue', lw=20, alpha=0.2, transform=ax1.transAxes, clip_on=False)
plt.tight_layout() # fit labels etc. nicely
plt.subplots_adjust(top=0.9) # make room for the additional elements
plt.show()
I have written my code to create a scatter plot with a color bar on the right. But the color bar does not look right, in the sense that the color is too light to be mapped to the actual color used in the plot. I am not sure what is missing or wrong here. But I am hoping to get something similar to what's shown here: https://medium.com/#juliansteam/what-bert-topic-modelling-reveal-about-the-2021-unrest-in-south-africa-d0d15629a9b4 (about in the middle of the page)
df = .... # data loading
df["topic"] = topics
# Plot parameters
top_n = topn
fontsize = 15
# some data preparation
to_plot = df.copy()
to_plot[df.topic >= top_n] = -1
outliers = to_plot.loc[to_plot.topic == -1]
non_outliers = to_plot.loc[to_plot.topic != -1]
#the actual plot
fig, ax = plt.subplots(figsize=(15, 15))
scatter_outliers = ax.scatter(outliers['x'], outliers['y'], color="#E0E0E0", s=1, alpha=.3)
scatter = ax.scatter(non_outliers['x'], non_outliers['y'], c=non_outliers['topic'], s=1, alpha=.3, cmap='hsv_r')
ax.text(0.99, 0.01, f"BERTopic - Top {top_n} topics", transform=ax.transAxes, horizontalalignment="right", color="black")
plt.xticks([], [])
plt.yticks([], [])
plt.colorbar(scatter)
plt.savefig(outfile+"_1.png", format='png', dpi=300)
plt.clf()
plt.close()
As you can see, an example plot looks like this. The color bar is created, but compared to that shown in the link above, the color is very light and does not seem to map to those on the scatter plot. Any suggestions?
The colorbar uses the given alpha=.3. In the scatterplot, many dots with the same color are superimposed, causing them to look brighter than a single dot.
One way to tackle this, is to create a ScalarMappable object to be used by the colorbar, taking the colormap and the norm of the scatter plot (but not its alpha). Note that simply changing the alpha of the scatter object (scatter.set_alpha(1)) would also change the plot itself.
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import numpy as np
x = np.random.normal(np.repeat(np.random.uniform(0, 20, 10), 1000))
y = np.random.normal(np.repeat(np.random.uniform(0, 10, 10), 1000))
c = np.repeat(np.arange(10), 1000)
scatter = plt.scatter(x, y, c=c, cmap='hsv_r', alpha=.3, s=3)
plt.colorbar(ScalarMappable(cmap=scatter.get_cmap(), norm=scatter.norm))
plt.tight_layout()
plt.show()
I'm trying to use Seaborn Pair Grid to make a correlogram with scatterplots in one half, histograms on the diagonal and the pearson coefficient on the other half. I've managed to put together the following code which does what I need, but I'm really struggling with further customization
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
df = sns.load_dataset('iris')
def reg_coef(x,y,label=None,color=None,**kwargs):
ax = plt.gca()
r,p = pearsonr(x,y)
ax.annotate('{:.2f}'.format(r), xy=(0.5,0.5), xycoords='axes fraction', ha='center',fontsize=30,
bbox={'facecolor': 'red', 'alpha': 0.5, 'pad': 20})
ax.set_axis_off()
sns.set(font_scale=1.5)
sns.set_style("white")
g = sns.PairGrid(df)
g.map_diag(plt.hist)
g.map_lower(plt.scatter)
g.map_upper(reg_coef)
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Iris Correlogram', fontsize=30)
plt.show()
This is the result
What I'd like to do:
Change the font used for the whole plot and assign my own defined rgb colour to the font and axes (same one)
Remove the X & Y tick labels
Change the colour of the scatter dots and histogram bars to my own defined rgb colour (same one)
Set a diverging colour map for the background of the pearson number to highlight the degree and type of correlation, again using my own defined rgb colours.
I know Im asking a lot but Ive spent hours going round in circles trying to figure this out!!
The color can be set as extra parameter in g.map_diag(plt.hist, color=...) and
g.map_lower(plt.scatter, color=...). The function reg_coef can be modified to take a colormap into account.
The font color and family can be set via the rcParams. The ticks can be removed via plt.setp(g.axes, xticks=[], yticks=[]). Instead of subplot_adjust, g.fig.tight_layout() usually fits all elements nicely into the plot. Here is an example:
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
def reg_coef(x, y, label=None, color=None, cmap=None, **kwargs):
ax = plt.gca()
r, p = pearsonr(x, y)
norm = plt.Normalize(-1, 1)
cmap = cmap if not cmap is None else plt.cm.coolwarm
ax.annotate(f'{r:.2f}', xy=(0.5, 0.5), xycoords='axes fraction', ha='center', fontsize=30,
bbox={'facecolor': cmap(norm(r)), 'alpha': 0.5, 'pad': 20})
ax.set_axis_off()
df = sns.load_dataset('iris')
sns.set(font_scale=1.5)
sns.set_style("white")
for param in ['text.color', 'axes.labelcolor', 'xtick.color', 'ytick.color']:
plt.rcParams[param] = 'cornflowerblue'
plt.rcParams['font.family'] = 'cursive'
g = sns.PairGrid(df, height=2)
g.map_diag(plt.hist, color='turquoise')
g.map_lower(plt.scatter, color='fuchsia')
g.map_upper(reg_coef, cmap=plt.get_cmap('PiYG'))
plt.setp(g.axes, xticks=[], yticks=[])
g.fig.suptitle('Iris Correlogram', fontsize=30)
g.fig.tight_layout()
plt.show()
I am trying to generate multi-panel figure using seaborn in python and I want the color of the points in my multi-panel figure to be specified by a continuous variable. Here's an example of what I am trying to do with the "iris" dataset:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.FacetGrid(iris, col = 'species', hue = 'petal_length', palette = 'seismic')
g = g.map(plt.scatter, 'sepal_length', 'sepal_width', s = 100, alpha = 0.5)
g.add_legend()
This makes the following figure:
Which is nice, but the legend is way too long. I'd like to sample out like 1/4 of these values (ideally) or barring that display a colorbar instead.
For instance, something like this might be acceptable, but I'd still want to split it over the three species.
plt.scatter(iris.sepal_length, iris.sepal_width, alpha = .8, c = iris.petal_length, cmap = 'seismic')
cbar = plt.colorbar()
Any idea about how I can get the best of both of these plots?
Edit:
This topic seems like a good start.
https://github.com/mwaskom/seaborn/issues/582
Somehow, for this user, simply appending plt.colorbar after everything else ran seemed to somehow work. Doesn't seem to help in this case though.
The FacetGrid hue is categorical, not continuous. It will require a little bit of work to get a continuous colormap for a scatterplot in the FacetGrid (unlike with imshow in the linked Github issue, matplotlib does not keep a reference to the "currently active scatterplot mapper" so that a magic call to plt.colorbar doesn't pick up the mapping applied to the point colors).
g = sns.FacetGrid(iris, col='species', palette = 'seismic')
def facet_scatter(x, y, c, **kwargs):
"""Draw scatterplot with point colors from a faceted DataFrame columns."""
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin, vmax = 0, 7
cmap = sns.diverging_palette(240, 10, l=65, center="dark", as_cmap=True)
g = g.map(facet_scatter, 'sepal_length', 'sepal_width', "petal_length",
s=100, alpha=0.5, vmin=vmin, vmax=vmax, cmap=cmap)
# Make space for the colorbar
g.fig.subplots_adjust(right=.92)
# Define a new Axes where the colorbar will go
cax = g.fig.add_axes([.94, .25, .02, .6])
# Get a mappable object with the same colormap as the data
points = plt.scatter([], [], c=[], vmin=vmin, vmax=vmax, cmap=cmap)
# Draw the colorbar
g.fig.colorbar(points, cax=cax)
Since you were asking about a legend for the scatter, one may adapt #mwaskom's solution to produce a legend with scatter points like so:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
iris = sns.load_dataset('iris')
g = sns.FacetGrid(iris, col='species', palette = 'seismic')
def facet_scatter(x, y, c, **kwargs):
kwargs.pop("color")
plt.scatter(x, y, c=c, **kwargs)
vmin, vmax = 0, 7
cmap = plt.cm.viridis
norm=plt.Normalize(vmin=vmin, vmax=vmax)
g = g.map(facet_scatter, 'sepal_length', 'sepal_width', "petal_length",
s=100, alpha=0.5, norm=norm, cmap=cmap)
# Make space for the colorbar
g.fig.subplots_adjust(right=.9)
lp = lambda i: plt.plot([], color=cmap(norm(i)), marker="o", ls="", ms=10, alpha=0.5)[0]
labels = np.arange(0,7.5,0.5)
h = [lp(i) for i in labels]
g.fig.legend(handles=h, labels=labels, fontsize=9)
plt.show()