Interactive overlay of multiple histograms in matplotlib - python

I have multiple plots to show that overlap each other.
import matplotlib.pyplot as plt
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
x_min, x_max = min(a + b + c), max(a + b + c)
plt.hist(a, range=(x_min,x_max), bins = 10, alpha=0.5, label="a")
plt.hist(b, range=(x_min,x_max), bins = 10, alpha=0.5, label="b")
plt.hist(c, range=(x_min,x_max), bins = 10, alpha=0.5, label="c")
plt.legend()
plt.show()
Is it possible for me to generate all the individual plots in one step, then allow the user to interactively choose which to overlay in a second step?
In this example, a correct solution would have three interactive check boxes (one fore each plot). Because there are 3 check boxes, there are 2^3=8 possible ways the user could specify the plot.

Obviously, you have to write your own function. Matplotlib hist() returns a BarContainer - a fancy name for a list of rectangle objects, i.e, the bars of the histogram. We can set the visibility of each rectangle like we can set the visibility of each line in a line plot. An implementation therefore could look like this:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import CheckButtons
def select_plots(list_of_inputs, labels=list("abcdefghijklmn"), bins=10):
#list_of_input and labels must have the same length
nr_of_hists=len(list_of_inputs)
labels = labels[:nr_of_hists]
x_min, x_max = min(min(l) for l in list_of_inputs), max(max(l) for l in list_of_inputs)
#collect list of barcontainers generated by hist plots
barcontainers = []
fig, ax = plt.subplots(figsize=(10, 8))
for label, list in zip(labels, list_of_inputs):
_, _, curr_barcontainer = ax.hist(list, range=(x_min,x_max), bins=bins, alpha=0.5, label=label)
barcontainers.append(curr_barcontainer)
#create checkboxes and color them to correspond with the plot
ax_chkbox = plt.axes([0.8, 0.9-0.05*nr_of_hists, 0.1, 0.05*nr_of_hists])
check_boxes = CheckButtons(ax_chkbox, labels, np.ones(nr_of_hists, dtype=bool))
handles, _ = ax.get_legend_handles_labels()
for rect, handle in zip(check_boxes.rectangles, handles):
rect.set_facecolor(handle.get_facecolor())
#redraw figure when checkbox status has changed
def chkboxcall(label):
for barcontainer, status in zip(barcontainers, check_boxes.get_status()):
[rect.set_visible(status) for rect in barcontainer]
fig.canvas.draw_idle()
#connect widget function
check_boxes.on_clicked(chkboxcall)
plt.show()
#return last status of checkboxes after figure has been closed
return [i for i, status in enumerate(check_boxes.get_status()) if status]
#input lists with their corresponding label names
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5,6,6]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
all_lists = [a, b, c]
all_labels = ["a", "b", "c"]
#bins can be integers or ranges like [0, 2, 6, 7, 10]
bins = 10
idx = select_plots(all_lists, all_labels, bins)
print("The following data are valid:", *[all_labels[i] for i in idx])
Sample output:
>>>The following data are valid: a c

The problem with referring to line examples for histograms is that histograms are actually the composition of multiple lines. This means, when the button is called, you need to loop over the lines to get the expected behavior.
In the following example we see how we loop over each part p of the histogram. We change the transparency depending on its current value. Note: %matplotlib qt is required for jupyter notebook.
import matplotlib.pyplot as plt
from matplotlib.widgets import CheckButtons
%matplotlib qt
### Specify Data And Range
a = [1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
b = [5,4,4,3,3,3,2,2,2,2,1,1,1,1,1]
c = [4,5,5,6,6,7,7,7,8,8,8,9,9,10,10]
x_min, x_max = min(a + b + c), max(a + b + c)
### Create Plots
fig, ax = plt.subplots()
p1 = ax.hist(a, range=(x_min,x_max), bins = 10, alpha=0.3, label="a")[2]
p2 = ax.hist(b, range=(x_min,x_max), bins = 10, alpha=0.3, label="b")[2]
p3 = ax.hist(c, range=(x_min,x_max), bins = 10, alpha=0.3, label="c")[2]
plt.subplots_adjust(left=0.2)
plots = [p1, p2, p3]
# Make Check Buttons
rax = plt.axes([0.05, 0.4, 0.1, 0.15])
labels = ["a", "b", "c"]
check = CheckButtons(rax, labels)
def action(label, default_transparency = 0.3):
index = labels.index(label)
for p in plots[index]:
if p.get_alpha() == default_transparency:
p.set_alpha(0.0)
else:
p.set_alpha(default_transparency)
plt.draw()
### Run Widget
check.on_clicked(action)
plt.legend()
plt.show()

Related

matplotlib.pyplot: How to plot single graph with different Colormaps and a Legend?

I am plotting separate figures for each attribute and label for each data sample. Here is the illustration:
As illustrated in the the last subplot (Label), my data contains seven classes (numerically) (0 to 6). I'd like to visualize these classes using a different fancy colors and a legend. Please note that I just want colors for last subplot. How should I do that?
Here is the code of above plot:
x, y = test_data["x"], test_data["y"]
# determine the total number of plots
n, off = x.shape[1] + 1, 0
plt.rcParams["figure.figsize"] = (40, 15)
# plot all the attributes
for i in range(6):
plt.subplot(n, 1, off + 1)
plt.plot(x[:, off])
plt.title('Attribute:' + str(i), y=0, loc='left')
off += 1
# plot Labels
plt.subplot(n, 1, n)
plt.plot(y)
plt.title('Label', y=0, loc='left')
plt.savefig(save_file_name, bbox_inches="tight")
plt.close()
First, just to set up a similar dataset:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random((100,6))
y = np.random.randint(0, 6, (100))
fig, axs = plt.subplots(6, figsize=(40,15))
We could use plt.scatter() to give individual points different marker styles:
for i in range(x.shape[-1]):
axs[i].scatter(range(x.shape[0]), x[:,i], c=y)
Or we could mask the arrays we're plotting:
for i in range(x.shape[-1]):
for j in np.unique(y):
axs[i].plot(np.ma.masked_where(y!=j, x[:,i]), 'o')
Either way we get the same results:
Edit: Ah you've edited your question! You can do exactly the same thing for your last plot only, just modify my code above to take it out of the loop of subplots :)
As suggested, we imitate the matplotlib step function by creating a LineCollection to color the different line segments:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
from matplotlib.patches import Patch
#random data generation
np.random.seed(12345)
number_of_categories=4
y = np.concatenate([np.repeat(np.random.randint(0, number_of_categories), np.random.randint(1, 30)) for _ in range(20)])
#check the results with less points
#y = y[:10]
x = y[None] * np.linspace(1, 5, 3)[:, None]
x += 2 * np.random.random(x.shape) - 1
#your initial plot
num_plots = x.shape[0] + 1
fig, axes = plt.subplots(num_plots, 1, sharex=True, figsize=(10, 8))
for i, ax in enumerate(axes.flat[:-1]):
ax.plot(x[i,:])
#first we create the matplotlib step function with x-values as their midpoint
axes.flat[-1].step(np.arange(y.size), y, where="mid", color="lightgrey", zorder=-1)
#then we plot colored segments with shifted index simulating the step function
shifted_x = np.arange(y.size+1)-0.5
#and identify the step indexes
idx_steps, = np.nonzero(np.diff(y, prepend=np.inf, append=np.inf))
#create collection of plateau segments
colored_segments = np.zeros((idx_steps.size-1, 2, 2))
colored_segments[:, :, 0] = np.vstack((shifted_x[idx_steps[:-1]], shifted_x[idx_steps[1:]])).T
colored_segments[:, :, 1] = np.repeat(y[idx_steps[:-1]], 2).reshape(-1, 2)
#generate discrete color list
n_levels, idx_levels = np.unique(y[idx_steps[:-1]], return_inverse=True)
colorarr = np.asarray(plt.cm.tab10.colors[:n_levels.size])
#and plot the colored segments
lc_cs = LineCollection(colored_segments, colors=colorarr[idx_levels, :], lw=10)
lines_cs = axes.flat[-1].add_collection(lc_cs)
#scaling and legend generation
axes.flat[-1].set_ylim(n_levels.min()-0.5, n_levels.max()+0.5)
axes.flat[-1].legend([Patch(color=colorarr[i, :]) for i, _ in enumerate(n_levels)],
[f"cat {i}" for i in n_levels],
loc="upper center", bbox_to_anchor=(0.5, -0.15),
ncol=n_levels.size)
plt.show()
Sample output:
Alternatively, you can use broken barh plots or color this axis or even all axes using axvspan.

Create 3D Plot (not surface, scatter), where colour depends on z values

I want to create and save a number of sequential plots so I can then make an mp4 movie out of those plots. I want the color of the plot to depend on z (the value of the third axis):
The code I am using:
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator
import numpy as np
file_dir1 = r"C:\Users\files\final_files\B_6_sec\_read.csv"
specs23 = pd.read_csv(file_dir1, sep=',')
choose_file = specs23 # Choose file betwenn specs21, specs22,...
quant = 0 # Choose between 0,1,...,according to the following list
column = ['$\rho$', '$V_{x}$', '$V_{y}$', '$V_{z}$','$B_{x}$', '$B_{y}$','$B_{z}$','$Temperature$']
choose_column = choose_file[column[quant]]
resolution = 1024 # Specify resolution of grid
t_steps = int(len(specs23)/resolution) # Specify number of timesteps
fig, ax = plt.subplots(subplot_kw={"projection": "3d"},figsize=(15,10))
# Make data.
X = np.arange(0, resolution, 1)
Y = np.arange(0, int(len(specs23)/resolution),1)
X, Y = np.meshgrid(X, Y)
Z = choose_file[column[quant]].values
new_z = np.zeros((t_steps,resolution)) # Selected quantity as a function of x,t
### Plot figure ###
for i in range(0,int(len(choose_file)/resolution)):
zs = choose_column[i*resolution:resolution*(i+1)].values
new_z[i] = zs
for i in range(len(X)):
ax.plot(X[i], Y[i], new_z[i]) #%// color binded to "z" values
ax.zaxis.set_major_locator(LinearLocator(10))
# A StrMethodFormatter is used automatically
ax.zaxis.set_major_formatter('{x:.02f}')
plt.show()
What I am getting looks like this:
I would like to look it like this:
I have created the second plot using the LineCollection module. The problem is that it prints all the lines at once not allowing me to save each separately to create a movie.
You can find the dataframe I am using to create the figure here:
https://www.dropbox.com/s/idbeuhyxqfy9xvw/_read.csv?dl=0
The poster wants two things
lines with colors depending on z-values
animation of the lines over time
In order to achieve(1) one needs to cut up each line in separate segments and assign a color to each segment; in order to obtain a colorbar, we need to create a scalarmappable object that knows about the outer limits of the colors.
For achieving 2, one needs to either (a) save each frame of the animation and combine it after storing all the frames, or (b) leverage the animation module in matplotlib. I have used the latter in the example below and achieved the following:
from mpl_toolkits.mplot3d import axes3d
import matplotlib.pyplot as plt, numpy as np
from mpl_toolkits.mplot3d.art3d import Line3DCollection
fig, ax = plt.subplots(subplot_kw = dict(projection = '3d'))
# generate data
x = np.linspace(-5, 5, 500)
y = np.linspace(-5, 5, 500)
z = np.exp(-(x - 2)**2)
# uggly
segs = np.array([[(x1,y2), (x2, y2), (z1, z2)] for x1, x2, y1, y2, z1, z2 in zip(x[:-1], x[1:], y[:-1], y[1:], z[:-1], z[1:])])
segs = np.moveaxis(segs, 1, 2)
# setup segments
# get bounds
bounds_min = segs.reshape(-1, 3).min(0)
bounds_max = segs.reshape(-1, 3).max(0)
# setup colorbar stuff
# get bounds of colors
norm = plt.cm.colors.Normalize(bounds_min[2], bounds_max[2])
cmap = plt.cm.plasma
# setup scalar mappable for colorbar
sm = plt.cm.ScalarMappable(norm, plt.cm.plasma)
# get average of segment
avg = segs.mean(1)[..., -1]
# get colors
colors = cmap(norm(avg))
# generate colors
lc = Line3DCollection(segs, norm = norm, cmap = cmap, colors = colors)
ax.add_collection(lc)
def update(idx):
segs[..., -1] = np.roll(segs[..., -1], idx)
lc.set_offsets(segs)
return lc
ax.set_xlim(bounds_min[0], bounds_max[0])
ax.set_ylim(bounds_min[1], bounds_max[1])
ax.set_zlim(bounds_min[2], bounds_max[2])
fig.colorbar(sm)
from matplotlib import animation
frames = np.linspace(0, 30, 10, 0).astype(int)
ani = animation.FuncAnimation(fig, update, frames = frames)
ani.save("./test_roll.gif", savefig_kwargs = dict(transparent = False))
fig.show()

How to create grid plot with inner subplots?

I have configured subplots of (5 x 1) format shown in Fig. 1 as given by Figure block A in the MWE. I am trying to repeat them n times such that they appear in a grid format with number of rows and columns given by the function fitPlots as mentioned here; to give output as shown in Fig. 2.
Fig. 1 Initial plot
Fig. 2 Repeated plot (desired output)
What would be the best way to repeat the code block to create a grid plot with inner subplots? The MWE creates multiple pages, I want all of them on a single page.
MWE
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import numpy as np
import math
x = np.arange(1, 100, 0.2)
y_a = np.sqrt(x)
y_b = np.sin(x)
y_c = np.sin(x)
y_d = np.cos(x) * np.cos(x)
y_e = 1/x
########## Figure block A #####################
with PdfPages('./plot_grid.pdf') as plot_grid_loop:
fig, (a, b, c, d, e) = plt.subplots(5, 1, sharex=True, gridspec_kw={'height_ratios': [5, 1, 1, 1, 1]})
a.plot(x, y_a)
b.plot(x, y_b)
c.plot(x, y_c)
d.plot(x, y_d)
e.plot(x, y_e)
plot_grid_loop.savefig()
plt.close
########## Figure block A #####################
# from https://stackoverflow.com/a/43366784/4576447
def fitPlots(N, aspect=(16,9)):
width = aspect[0]
height = aspect[1]
area = width*height*1.0
factor = (N/area)**(1/2.0)
cols = math.floor(width*factor)
rows = math.floor(height*factor)
rowFirst = width < height
while rows*cols < N:
if rowFirst:
rows += 1
else:
cols += 1
rowFirst = not(rowFirst)
return rows, cols
n_plots = 15
n_rows, n_cols = fitPlots(n_plots)
with PdfPages('./plot_grid.pdf') as plot_grid_loop:
for m in range(1, n_plots+1):
fig, (a, b, c, d, e) = plt.subplots(5, 1, sharex=True, gridspec_kw={'height_ratios': [5, 1, 1, 1, 1]})
a.plot(x, y_a)
b.plot(x, y_b)
c.plot(x, y_c)
d.plot(x, y_d)
e.plot(x, y_e)
plot_grid_loop.savefig()
plt.close
This can be done by generating a GridSpec object with gs_fig = fig.add_gridspec() that contains enough rows and columns to fit the five figure blocks (note that when you use plt.subplots a GridSpec is also generated and can be accessed with ax.get_gridspec()). Each empty slot in the GridSpec can then be filled with a sub-GridSpec with gs_sub = gs_fig[i].subgridspec() to hold the five subplots. The trickier part is sharing the x-axis. This can be done by generating an empty first Axes with which the x-axis of all the subplots can be shared.
The following example illustrates this with only three figure blocks, based on the code sample you have shared but with some differences regarding the figure design: the number of rows is computed based on the chosen number of columns, and the figure dimensions are set based on a chosen figure width and aspect ratio. The code for saving the figure to a pdf file is not included.
import numpy as np # v 1.19.2
import matplotlib.pyplot as plt # v 3.3.4
# Create variables to plot
x = np.arange(1, 100, 0.2)
y_a = np.sqrt(x)
y_b = np.sin(x)
y_c = np.sin(x)
y_d = np.cos(x)*np.cos(x)
y_e = 1/x
# Set parameters for figure dimensions
nplots = 3 # random number of plots for this example
ncols = 2
nrows = int(np.ceil(nplots/ncols))
subp_w = 10/ncols # 10 is the total figure width in inches
subp_h = 1*subp_w # set subplot aspect ratio
# Create figure containing GridSpec object with appropriate dimensions
fig = plt.figure(figsize=(ncols*subp_w, nrows*subp_h))
gs_fig = fig.add_gridspec(nrows, ncols)
# Loop through GridSpec to add sub-GridSpec for each figure block
heights = [5, 1, 1, 1, 1]
for i in range(nplots):
gs_sub = gs_fig[i].subgridspec(len(heights), 1, height_ratios=heights, hspace=0.2)
ax = fig.add_subplot(gs_sub[0, 0]) # generate first empty Axes to enable sharex
ax.axis('off') # remove x and y axes because it is overwritten in the loop below
# Loop through y variables to plot all the subplots with shared x-axis
for j, y in enumerate([y_a, y_b, y_c, y_d, y_e]):
ax = fig.add_subplot(gs_sub[j, 0], sharex=ax)
ax.plot(x, y)
if not ax.is_last_row():
ax.tick_params(labelbottom=False)
Reference: matplotlib tutorial GridSpec using SubplotSpec

Append data with different colour in matplotlib in real time

I'm updating dynamically a plot in a loop:
dat=[0, max(X[:, 0])]
fig = plt.figure()
ax = fig.add_subplot(111)
Ln, = ax.plot(dat)
Ln2, = ax.plot(dat)
plt.ion()
plt.show()
for i in range(1, 40):
ax.set_xlim(int(len(X[:i])*0.8), len(X[:i])) #show last 20% data of X
Ln.set_ydata(X[:i])
Ln.set_xdata(range(len(X[:i])))
Ln2.set_ydata(Y[:i])
Ln2.set_xdata(range(len(Y[:i])))
plt.pause(0.1)
But now I want to update it in a different way: append some values and show them in other colour:
X.append(other_data)
# change colour just to other_data in X
The result should look something like this:
How could I do that?
Have a look at the link I posted. Linesegments can be used to plot colors at a particular location differently. If you want to do it in real-time you can still use line-segments. I leave that up to you.
# adjust from https://stackoverflow.com/questions/38051922/how-to-get-differents-colors-in-a-single-line-in-a-matplotlib-figure
import numpy as np, matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm
# my func
x = np.linspace(-2 * np.pi, 2 * np.pi, 100)
y = 3000 * np.sin(x)
# select how to color
cmap = ListedColormap(['r','b'])
norm = BoundaryNorm([2000,], cmap.N)
# get segments
xy = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.hstack([xy[:-1], xy[1:]])
# control which values have which colors
n = y.shape[0]
c = np.array([plt.cm.RdBu(0) if i < n//2 else plt.cm.RdBu(255) for i in range(n)])
# c = plt.cm.Reds(np.arange(0, n))
# make line collection
lc = LineCollection(segments,
colors = c
# norm = norm,
)
# plot
fig, ax = plt.subplots()
ax.add_collection(lc)
ax.autoscale()
ax.axvline(x[n//2], linestyle = 'dashed')
ax.annotate("Half-point", (x[n//2], y[n//2]), xytext = (4, 1000),
arrowprops = dict(headwidth = 30))
fig.show()

Automatically place the labels of a scatter plot around the points so that they don't overwrite each other [duplicate]

I use matplotlib to plot a scatter chart:
And label the bubble using a transparent box according to the tip at How to annotate point on a scatter automatically placed arrow
Here is the code:
if show_annote:
for i in range(len(x)):
annote_text = annotes[i][0][0] # STK_ID
ax.annotate(annote_text, xy=(x[i], y[i]), xytext=(-10,3),
textcoords='offset points', ha='center', va='bottom',
bbox=dict(boxstyle='round,pad=0.2', fc='yellow', alpha=0.2),
fontproperties=ANNOTE_FONT)
and the resulting plot:
But there is still room for improvement to reduce overlap (for instance the label box offset is fixed as (-10,3)). Are there algorithms that can:
dynamically change the offset of label box according to the crowdedness of its neighbourhood
dynamically place the label box remotely and add an arrow line beween bubble and label box
somewhat change the label orientation
label_box overlapping bubble is better than label_box overlapping label_box?
I just want to make the chart easy for human eyes to comprehand, so some overlap is OK, not as rigid a constraint as http://en.wikipedia.org/wiki/Automatic_label_placement suggests. And the bubble quantity within the chart is less than 150 most of the time.
I find the so called Force-based label placement http://bl.ocks.org/MoritzStefaner/1377729 is quite interesting. I don't know if there is any python code/package available to implement the algorithm.
I am not an academic guy and not looking for an optimum solution, and my python codes need to label many many charts, so the the speed/memory is in the scope of consideration.
I am looking for a quick and effective solution. Any help (code,algorithm,tips,thoughts) on this subject? Thanks.
The following builds on tcaswell's answer.
Networkx layout methods such as nx.spring_layout rescale the positions so that they all fit in a unit square (by default). Even the position of the fixed data_nodes are rescaled. So, to apply the pos to the original scatter_data, an unshifting and unscaling must be performed.
Note also that nx.spring_layout has a k parameter which controls the optimal distance between nodes. As k increases, so does the distance of the annotations from the data points.
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
np.random.seed(2016)
N = 20
scatter_data = np.random.rand(N, 3)*10
def repel_labels(ax, x, y, labels, k=0.01):
G = nx.DiGraph()
data_nodes = []
init_pos = {}
for xi, yi, label in zip(x, y, labels):
data_str = 'data_{0}'.format(label)
G.add_node(data_str)
G.add_node(label)
G.add_edge(label, data_str)
data_nodes.append(data_str)
init_pos[data_str] = (xi, yi)
init_pos[label] = (xi, yi)
pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes, k=k)
# undo spring_layout's rescaling
pos_after = np.vstack([pos[d] for d in data_nodes])
pos_before = np.vstack([init_pos[d] for d in data_nodes])
scale, shift_x = np.polyfit(pos_after[:,0], pos_before[:,0], 1)
scale, shift_y = np.polyfit(pos_after[:,1], pos_before[:,1], 1)
shift = np.array([shift_x, shift_y])
for key, val in pos.items():
pos[key] = (val*scale) + shift
for label, data_str in G.edges():
ax.annotate(label,
xy=pos[data_str], xycoords='data',
xytext=pos[label], textcoords='data',
arrowprops=dict(arrowstyle="->",
shrinkA=0, shrinkB=0,
connectionstyle="arc3",
color='red'), )
# expand limits
all_pos = np.vstack(pos.values())
x_span, y_span = np.ptp(all_pos, axis=0)
mins = np.min(all_pos-x_span*0.15, 0)
maxs = np.max(all_pos+y_span*0.15, 0)
ax.set_xlim([mins[0], maxs[0]])
ax.set_ylim([mins[1], maxs[1]])
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
repel_labels(ax, scatter_data[:, 0], scatter_data[:, 1], labels, k=0.008)
plt.show()
with k=0.011 yields
and with k=0.008 yields
Another option using my library adjustText, written specially for this purpose (https://github.com/Phlya/adjustText).
from adjustText import adjust_text
np.random.seed(2016)
N = 50
scatter_data = np.random.rand(N, 3)
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
texts = []
for x, y, text in zip(scatter_data[:, 0], scatter_data[:, 1], labels):
texts.append(ax.text(x, y, text))
plt.show()
np.random.seed(2016)
N = 50
scatter_data = np.random.rand(N, 3)
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1],
c=scatter_data[:, 2], s=scatter_data[:, 2] * 150)
labels = ['ano_{}'.format(i) for i in range(N)]
texts = []
for x, y, text in zip(scatter_data[:, 0], scatter_data[:, 1], labels):
texts.append(ax.text(x, y, text))
adjust_text(texts, force_text=0.05, arrowprops=dict(arrowstyle="-|>",
color='r', alpha=0.5))
plt.show()
It doesn't repel from the bubbles, only from their centers and other texts.
It is a little rough around the edges (I can't quite figure out how to scale the relative strengths of the spring network vs the repulsive force, and the bounding box is a bit screwed up), but this is a decent start:
import networkx as nx
N = 15
scatter_data = rand(3, N)
G=nx.Graph()
data_nodes = []
init_pos = {}
for j, b in enumerate(scatter_data.T):
x, y, _ = b
data_str = 'data_{0}'.format(j)
ano_str = 'ano_{0}'.format(j)
G.add_node(data_str)
G.add_node(ano_str)
G.add_edge(data_str, ano_str)
data_nodes.append(data_str)
init_pos[data_str] = (x, y)
init_pos[ano_str] = (x, y)
pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes)
ax = gca()
ax.scatter(scatter_data[0], scatter_data[1], c=scatter_data[2], s=scatter_data[2]*150)
for j in range(N):
data_str = 'data_{0}'.format(j)
ano_str = 'ano_{0}'.format(j)
ax.annotate(ano_str,
xy=pos[data_str], xycoords='data',
xytext=pos[ano_str], textcoords='data',
arrowprops=dict(arrowstyle="->",
connectionstyle="arc3"))
all_pos = np.vstack(pos.values())
mins = np.min(all_pos, 0)
maxs = np.max(all_pos, 0)
ax.set_xlim([mins[0], maxs[0]])
ax.set_ylim([mins[1], maxs[1]])
draw()
How well it works depends a bit on how your data is clustered.
We can use plotly for this. But we can't help placing overlap correctly if there is lot of data. Instead we can zoom in and zoom out.
import plotly.express as px
df = px.data.tips()
df = px.data.gapminder().query("year==2007 and continent=='Americas'")
fig = px.scatter(df, x="gdpPercap", y="lifeExp", text="country", log_x=True, size_max=100, color="lifeExp",
title="Life Expectency")
fig.update_traces(textposition='top center')
fig.show()
Output:
Just created another quick solution that is also very fast: textalloc
In this case you could do something like this:
import textalloc as ta
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(2022)
N = 30
scatter_data = np.random.rand(N, 3)*10
fig, ax = plt.subplots()
ax.scatter(scatter_data[:, 0], scatter_data[:, 1], c=scatter_data[:, 2], s=scatter_data[:, 2] * 50, zorder=10,alpha=0.5)
labels = ['ano-{}'.format(i) for i in range(N)]
text_list = labels = ['ano-{}'.format(i) for i in range(N)]
ta.allocate_text(fig,ax,scatter_data[:, 0],scatter_data[:, 1],
text_list,
x_scatter=scatter_data[:, 0], y_scatter=scatter_data[:, 1],
max_distance=0.2,
min_distance=0.04,
margin=0.039,
linewidth=0.5,
nbr_candidates=400)
plt.show()

Categories