Synchronizing twinx y-axis based on two lines - python

My end goal is to create a graph that quickly communicates that two data points are between their respective bounds. I could instead of having this information on one graph, create two separate graphs; the chart the data and illustrate the bounds with horizontal lines. If I could have it so that this basic function is done with one graph, it would be much more elegant.
Is there some method I can use to sync the two y-axes so that a certain value A1 on y-axis 1 and A2 on y-axis 2 appear on the same place vertically within the graph, while at the same time, ensuring that another certain value B1 on y-axis 1 and B2 on y-axis 2appears on a separate distinct place vertically within the graph?
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
### Generate linear data
Temp = 20
pH = 6
DataCopy = pd.DataFrame({'Temp': [], 'pH': [], 'Time': []})
for i in range(10):
DataTime = datetime.datetime.now()
DataCopy = DataCopy.append({'Temp': Temp, 'pH': pH, 'Time': DataTime}, ignore_index=True)
Temp += (-0.5)
pH += (0.2)
### Plot data unto graph w/ double y-axis
sns.lineplot(data=DataCopy, x='Time', y='pH', color = 'red', label = 'Temp')
ax2 = plt.twinx()
sns.lineplot(data=DataCopy, x='Time', y='Temp', color = 'blue', label = 'pH', ax=ax2)
plt.legend()
plt.show()
How this implementation would look if done on separate graphs:
Desired effect:

You seem to want to align two positions on the left axis with two positions on the right axis.
The following approach measures the distance factor of the lower limit vs the two lines, for both axes. Then it applies the largest factor to the axis with the lowest factor. The analog happens for the upper limit of the axes.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
np.random.seed(2021)
ph_low, ph_high = 7, 9
temp_low, temp_high = 12, 18
data_copy = pd.DataFrame({'Temp': np.random.normal(0.04, 0.4, 100).cumsum() + 10,
'pH': np.random.normal(0.02, 0.2, 100).cumsum() + 6,
'Time': pd.date_range('20211211 08:00:00', freq='1min', periods=100)})
plt.figure(figsize=(12, 5))
ax1 = sns.lineplot(data=data_copy, x='Time', y='pH', color='red', label='Temp')
ax1.axhline(ph_low, color='red', ls=(0, (5, 5, 0)))
ax1.axhline(ph_high, color='red', ls=(0, (5, 5, 0)))
ax2 = ax1.twinx()
sns.lineplot(data=data_copy, x='Time', y='Temp', color='blue', label='pH', ax=ax2)
ax2.axhline(temp_low, color='blue', ls=(0, (0, 5, 5)))
ax2.axhline(temp_high, color='blue', ls=(0, (0, 5, 5)))
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax1.legend_.remove()
ax2.legend(handles=handles1 + handles2, labels=labels1 + labels2)
ymin1, ymax1 = ax1.get_ylim()
ymin2, ymax2 = ax2.get_ylim()
fymin1 = (ph_low - ymin1) / (ph_high - ph_low)
fymin2 = (temp_low - ymin2) / (temp_high - temp_low)
if fymin1 < fymin2: # move ymin1 using fymin2
ymin1 = ph_low - fymin2 * (ph_high - ph_low)
else: # move ymin2 using fymin1
ymin2 = temp_low - fymin1 * (temp_high - temp_low)
fymax1 = (ymax1 - ph_high) / (ph_high - ph_low)
fymax2 = (ymax2 - temp_high) / (temp_high - temp_low)
if fymax1 < fymax2: # move ymax1 using fymax2
ymax1 = ph_high + fymax2 * (ph_high - ph_low)
else: # move ymax2 using fymax1
ymax2 = temp_high + fymax1 * (temp_high - temp_low)
ax1.set_ylim(ymin1, ymax1)
ax2.set_ylim(ymin2, ymax2)
plt.show()

Related

Matplotlib stacked histogram label

Here is my picture. I need to make label for those bars however every upper layer contains lower layer - so the label should containt grouped colors, i.e. blue - dataset 1, blue/orange - dataset 2, blue/orange/green - dataset 3 and finally blue/orange/green/purple - dataset 4. Is it plausible to make it? Thank you.
enter image description here
binwidth = 1
n, bins, patches = ax1.hist(C, bins=range(81, 105, binwidth),
density=False, histtype='barstacked' ,
edgecolor='gray',
color=barvy_histogram,linewidth=0.3)
hatches = ['//','x','..','oo']
for patch_set, hatch in zip(patches, hatches):
for patch in patch_set.patches:
patch.set_hatch(hatch)
patch.set_linewidth=0.1
patch.set_color='gray'
mpl.rcParams['hatch.linewidth'] = 0.5
The following approach uses the tuple legend handler (HandlerTuple) to combine the legend handles. It produces a horizontal layout, while maybe a vertical stacking would be more interesting.
The code starts with creating some test data, supposing C is an Nx4 array of integers. The bin edges are set at halves to make sure that floating point accuracy wouldn't place values in the wrong bin.
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.legend_handler import HandlerTuple
import numpy as np
# first, create some test data
C = (np.random.normal(0.001, 1, (100, 20)).cumsum(axis=0) * 1.2 + 90).astype(int).reshape(-1, 4)
c_min = C.min()
c_max = C.max()
mpl.rcParams['hatch.linewidth'] = 0.5
fig, ax1 = plt.subplots(figsize=(12, 5))
binwidth = 1
colors = plt.cm.Set2.colors[:C.shape[1]]
_, _, patches = ax1.hist(C, bins=np.arange(c_min - 0.5, c_max + binwidth, binwidth),
density=False, histtype='barstacked',
edgecolor='gray', color=colors, linewidth=0.3,
label=[f'N={p}' for p in range(25, 101, 25)])
hatches = ['//', 'x', '..', 'oo']
for patch_set, hatch in zip(patches, hatches):
for patch in patch_set.patches:
patch.set_hatch(hatch)
patch.set_linewidth = 0.1
handles, labels = ax1.get_legend_handles_labels()
ax1.legend(handles=[tuple(handles[:i + 1]) for i in range(C.shape[1])], labels=labels,
handlelength=6, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)})
plt.show()

matplotlib.pyplot: How to plot single graph with different Colormaps and a Legend?

I am plotting separate figures for each attribute and label for each data sample. Here is the illustration:
As illustrated in the the last subplot (Label), my data contains seven classes (numerically) (0 to 6). I'd like to visualize these classes using a different fancy colors and a legend. Please note that I just want colors for last subplot. How should I do that?
Here is the code of above plot:
x, y = test_data["x"], test_data["y"]
# determine the total number of plots
n, off = x.shape[1] + 1, 0
plt.rcParams["figure.figsize"] = (40, 15)
# plot all the attributes
for i in range(6):
plt.subplot(n, 1, off + 1)
plt.plot(x[:, off])
plt.title('Attribute:' + str(i), y=0, loc='left')
off += 1
# plot Labels
plt.subplot(n, 1, n)
plt.plot(y)
plt.title('Label', y=0, loc='left')
plt.savefig(save_file_name, bbox_inches="tight")
plt.close()
First, just to set up a similar dataset:
import matplotlib.pyplot as plt
import numpy as np
x = np.random.random((100,6))
y = np.random.randint(0, 6, (100))
fig, axs = plt.subplots(6, figsize=(40,15))
We could use plt.scatter() to give individual points different marker styles:
for i in range(x.shape[-1]):
axs[i].scatter(range(x.shape[0]), x[:,i], c=y)
Or we could mask the arrays we're plotting:
for i in range(x.shape[-1]):
for j in np.unique(y):
axs[i].plot(np.ma.masked_where(y!=j, x[:,i]), 'o')
Either way we get the same results:
Edit: Ah you've edited your question! You can do exactly the same thing for your last plot only, just modify my code above to take it out of the loop of subplots :)
As suggested, we imitate the matplotlib step function by creating a LineCollection to color the different line segments:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import LineCollection
from matplotlib.patches import Patch
#random data generation
np.random.seed(12345)
number_of_categories=4
y = np.concatenate([np.repeat(np.random.randint(0, number_of_categories), np.random.randint(1, 30)) for _ in range(20)])
#check the results with less points
#y = y[:10]
x = y[None] * np.linspace(1, 5, 3)[:, None]
x += 2 * np.random.random(x.shape) - 1
#your initial plot
num_plots = x.shape[0] + 1
fig, axes = plt.subplots(num_plots, 1, sharex=True, figsize=(10, 8))
for i, ax in enumerate(axes.flat[:-1]):
ax.plot(x[i,:])
#first we create the matplotlib step function with x-values as their midpoint
axes.flat[-1].step(np.arange(y.size), y, where="mid", color="lightgrey", zorder=-1)
#then we plot colored segments with shifted index simulating the step function
shifted_x = np.arange(y.size+1)-0.5
#and identify the step indexes
idx_steps, = np.nonzero(np.diff(y, prepend=np.inf, append=np.inf))
#create collection of plateau segments
colored_segments = np.zeros((idx_steps.size-1, 2, 2))
colored_segments[:, :, 0] = np.vstack((shifted_x[idx_steps[:-1]], shifted_x[idx_steps[1:]])).T
colored_segments[:, :, 1] = np.repeat(y[idx_steps[:-1]], 2).reshape(-1, 2)
#generate discrete color list
n_levels, idx_levels = np.unique(y[idx_steps[:-1]], return_inverse=True)
colorarr = np.asarray(plt.cm.tab10.colors[:n_levels.size])
#and plot the colored segments
lc_cs = LineCollection(colored_segments, colors=colorarr[idx_levels, :], lw=10)
lines_cs = axes.flat[-1].add_collection(lc_cs)
#scaling and legend generation
axes.flat[-1].set_ylim(n_levels.min()-0.5, n_levels.max()+0.5)
axes.flat[-1].legend([Patch(color=colorarr[i, :]) for i, _ in enumerate(n_levels)],
[f"cat {i}" for i in n_levels],
loc="upper center", bbox_to_anchor=(0.5, -0.15),
ncol=n_levels.size)
plt.show()
Sample output:
Alternatively, you can use broken barh plots or color this axis or even all axes using axvspan.

Seaborn swarmplot break into lines

I'm trying to make this swarmplot with seaborn
My problem is that the swarms are too wide. I want to be able to break them up into rows of maximum 3 dots per row
This is my code:
# Import modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
###
# Import and clead dataset
url = "https://raw.githubusercontent.com/amirnakar/scratchboard/master/Goodreads/goodreads_library_export.csv"
Books = pd.read_csv(url)
Books = Books[Books['Date Read'].notna()] # Remove NA
Books['Year'] = pd.to_datetime( # Convert to dates
Books['Date Read'],
format='%YYYY%mm%dd',
errors='coerce')
Books['Year'] = pd.DatetimeIndex(Books['Date Read']).year # Take only years
Books[['Year', 'Date Read']] # merge the years in
###
# Calculate mean rate by year
RateMeans = (Books["My Rating"].groupby(Books["Year"]).mean())
Years = list(RateMeans.index.values)
Rates = list(RateMeans)
RateMeans = pd.DataFrame(
{'Years': Years,
'Rates': Rates
})
###
# Plot
fig,ax = plt.subplots(figsize=(20,10))
## Violin Plot:
plot = sns.violinplot(
data=Books,
x = "Year",
y = 'My Rating',
ax=ax,
color = "white",
inner=None,
#palette=colors_from_values(ArrayRates[:,1], "Blues")
)
## Swarm Plot
plot = sns.swarmplot(
data=Books,
x = "Year",
y = 'My Rating',
ax=ax,
hue = "My Rating",
size = 10
)
## Style
### Title
ax.text(x=0.5, y=1.1, s='Book Ratings: Distribution per Year', fontsize=32, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
ax.text(x=0.5, y=1.05, s='Source: Goodreads.com (amirnakar)', fontsize=24, alpha=0.75, ha='center', va='bottom', transform=ax.transAxes)
### Axis
ax.set(xlim=(4.5, None), ylim=(0,6))
#ax.set_title('Book Ratings: Distribution per Year \n', fontsize = 32)
ax.set_ylabel('Rating (out of 5 stars)', fontsize = 24)
ax.set_xlabel('Year', fontsize = 24)
ax.set_yticklabels(ax.get_yticks().astype(int), size=20)
ax.set_xticklabels(ax.get_xticks(), size=20)
### Legend
plot.legend(loc="lower center", ncol = 5 )
### Colour pallete
colorset = ["#FAFF04", "#FFD500", "#9BFF00", "#0099FF", "#000BFF"]
colorset.reverse()
sns.set_palette(sns.color_palette(colorset))
# Save the plot
#plt.show(plot)
plt.savefig("Rate-Python.svg", format="svg")
This is the output:
What I want to have happen:
I want to be able to define that each row of dots should have a maximum of 3, if it's more, than break it into a new row. I demonstrate it here (done manually in PowerPoint) on two groups, but I want it for the entire plot
BEFORE:
AFTER:
Here is an attempt to relocate the dots a bit upward/downward. The value for delta comes from experimenting.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Import and clea dataset
url = "https://raw.githubusercontent.com/amirnakar/scratchboard/master/Goodreads/goodreads_library_export.csv"
Books = pd.read_csv(url)
Books = Books[Books['Date Read'].notna()] # Remove NA
Books['Year'] = pd.DatetimeIndex(Books['Date Read']).year # Take only years
# Calculate mean rate by year
RatePerYear = Books[["My Rating", "Year"]].groupby("Year")["My Rating"].value_counts()
modified_ratings = []
delta = 0.2 # distance to move overlapping ratings
for (year, rating), count in RatePerYear.iteritems():
higher = max(0, ((count - 3) + 1) // 2)
lower = max(0, (count - 3) // 2)
modified_ratings.append([year, rating, count - higher - lower])
for k in range((higher + 2) // 3):
modified_ratings.append([year, rating + (k + 1) * delta, 3 if (k + 1) * 3 <= higher else higher % 3])
for k in range((lower + 2) // 3):
modified_ratings.append([year, rating - (k + 1) * delta, 3 if (k + 1) * 3 <= lower else lower % 3])
modified_ratings = np.array(modified_ratings)
modified_ratings_df = pd.DataFrame(
{'Year': np.repeat(modified_ratings[:, 0].astype(int), modified_ratings[:, 2].astype(int)),
'My Rating': np.repeat(modified_ratings[:, 1], modified_ratings[:, 2].astype(int))})
modified_ratings_df['Rating'] = modified_ratings_df['My Rating'].round().astype(int)
fig, ax = plt.subplots(figsize=(20, 10))
sns.violinplot(data=Books, x="Year", y='My Rating', ax=ax, color="white", inner=None)
palette = ["#FAFF04", "#FFD500", "#9BFF00", "#0099FF", "#000BFF"].reverse()
sns.swarmplot(data=modified_ratings_df, x="Year", y='My Rating', ax=ax, hue="Rating", size=10, palette=palette)
ax.set(xlim=(4.5, None), ylim=(0, 6))
ax.legend(loc="lower center", ncol=5)
plt.tight_layout()
plt.show()

Visualizing the difference between two numeric arrays

I have two numeric arrays of equal length, with one array always having the element value >= to the corresponding (same index) element in the second array.
I am trying to visualize in a single graph:
i) difference between the corresponding elements,
ii) values of the corresponding elements in the two arrays.
I have tried plotting the CDF as below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
arr1 = np.random.uniform(1,20,[25,1])
arr2 = arr1 + np.random.uniform(1,10,[25,1])
df1 = pd.DataFrame(arr1)
df2 = pd.DataFrame(arr2)
fix, ax = plt.subplots()
sns.kdeplot(df1[0], cumulative=True, color='orange', label='arr1')
sns.kdeplot(df2[0], cumulative=True, color='b', label='arr2')
sns.kdeplot(df2[0]-df1[0], cumulative=True, color='r', label='difference')
plt.show()
which gives the following output:
However, it does not capture the difference, and values of the corresponding elements together. For example, suppose the difference between two elements is 3. The two numbers can be 2 and 5, but they can also be 15 and 18, and this can not be determined from the CDF.
Which kind of plotting can visualize both the difference between the elements and the values of the elements?
I do not wish to line plot as below because not much statistical insights can be derived from the visualization.
ax.plot(df1[0])
ax.plot(df2[0])
ax.plot(df2[0]-df1[0])
There are lots of ways to show difference between two values. It really depends on your goal for the chart, how quantitative or qualitative you want to be, or if you want to show the raw data somehow. Here are a few ideas that come to mind that do not involve simple line plots or density functions. I strongly recommend the book Better Data Visualization by Johnathan Schwabish. He discusses interesting considerations regarding data presentation.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
arr1 = np.random.uniform(1,20, size=25)
arr2 = arr1 + np.random.uniform(1,10, size=25)
df = pd.DataFrame({
'col1' : arr1,
'col2' : arr2
})
df['diff'] = df.col2 - df.col1
df['sum'] = df.col1 + df.col2
fig, axes = plt.subplots(ncols=2, nrows=3, figsize=(15,15))
axes = axes.flatten()
# Pyramid chart
df_sorted = df.sort_values(by='sum', ascending=True)
axes[0].barh(
y = np.arange(1,26),
width = -df_sorted.col1
)
axes[0].barh(
y = np.arange(1,26),
width = df_sorted.col2
)
# Style axes[0]
style_func(axes[0], 'Pyramid Chart')
# Dot Plot
axes[1].scatter(df.col1, np.arange(1, 26), label='col1')
axes[1].scatter(df.col2, np.arange(1, 26), label='col2')
axes[1].hlines(
y = np.arange(1, 26),
xmin = df.col1, xmax = df.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[1]
legend = axes[1].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[1], 'Dot Plot')
set_xlim = axes[1].set_xlim(0,25)
# Dot Plot 2
df_sorted = df.sort_values(by=['col1', 'diff'], ascending=False)
axes[2].scatter(df_sorted.col1, np.arange(1, 26), label='col1')
axes[2].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[2].hlines(
y = np.arange(1, 26),
xmin = df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=1.5, color='k'
)
# Style axes[2]
legend = axes[2].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[2], 'Dot Plot')
set_xlim = axes[2].set_xlim(0,25)
# Dot Plot 3
df_sorted = df.sort_values(by='sum', ascending=True)
axes[3].scatter(-df_sorted.col1, np.arange(1, 26), label='col1')
axes[3].scatter(df_sorted.col2, np.arange(1, 26), label='col2')
axes[3].vlines(x=0, ymin=-1, ymax=27, linewidth=2.5, color='k')
axes[3].hlines(
y = np.arange(1, 26),
xmin = -df_sorted.col1, xmax = df_sorted.col2,
zorder=0, linewidth=2
)
# Style axes[3]
legend = axes[3].legend(ncol=2, loc='center', bbox_to_anchor=(0.14,1.025), edgecolor='w')
style_func(axes[3], 'Dot Plot')
# Strip plot
axes[4].scatter(df.col1, [4] * 25)
axes[4].scatter(df.col2, [6] * 25)
axes[4].set_ylim(0, 10)
axes[4].vlines(
x = [df.col1.mean(), df.col2.mean()],
ymin = [3.5, 5.5], ymax=[4.5,6.5],
color='black', linewidth =2
)
# Style axes[4]
axes[4].yaxis.set_major_locator(ticker.FixedLocator([4,6]))
axes[4].yaxis.set_major_formatter(ticker.FixedFormatter(['col1','col2']))
hide_spines = [axes[4].spines[x].set_visible(False) for x in ['left','top','right']]
set_title = axes[4].set_title('Strip Plot', fontweight='bold')
tick_params = axes[4].tick_params(axis='y', left=False)
grid = axes[4].grid(axis='y', dashes=(8,3), alpha=0.3, color='gray')
# Slope chart
for i in range(25):
axes[5].plot([0,1], [df.col1[i], df.col2[i]], color='k')
align = ['left', 'right']
for i in range(1,3):
axes[5].text(x = i - 1, y = 0, s = 'col' + str(i),
fontsize=14, fontweight='bold', ha=align[i-1])
set_title = axes[5].set_title('Slope chart', fontweight='bold')
axes[5].axis('off')
def style_func(ax, title):
hide_spines = [ax.spines[x].set_visible(False) for x in ['left','top','right']]
set_title = ax.set_title(title, fontweight='bold')
set_xlim = ax.set_xlim(-25,25)
x_locator = ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
y_locator = ax.yaxis.set_major_locator(ticker.FixedLocator(np.arange(1,26, 2)))
spine_width = ax.spines['bottom'].set_linewidth(1.5)
x_tick_params = ax.tick_params(axis='x', length=8, width=1.5)
x_tick_params = ax.tick_params(axis='y', left=False)
What about a parallel coordinates plot with plotly? This will allow to see the distinct values of each original array but then also if they converge on the same diffrence?
https://plot.ly/python/parallel-coordinates-plot/

Add colorbar labels as text on scatter plot

I have a scatter plot generated using:
x = list(auto['umap1'])
y = list(auto['umap2'])
final_df2 = pd.DataFrame(list(zip(x,y,communities)), columns =['x', 'y', 'cluster'])
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s = 0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = range(max(cluster_list)))
plt.show()
I wanted to know how can I add the colorbar labels (numbers from 1-31) to the actual clusters on the graph (as text) that each one corresponds to. This is because it is quite hard to tell this from the colours as they loop back to red.
I tried:
n = list(final_df2['cluster'])
for i, txt in enumerate(n):
ax.annotate(txt, (y[i], x[i]))
But this is giving me no luck.
Your code for the annotations is writing an annotation for each and every dot. This just ends in a sea of numbers.
Somehow, you should find a kind of center for each cluster, for example by averaging all the points that belong to the same cluster.
Then, you use the coordinates of the center to position the text. You can give it a background to make it easier to read.
As I don't have your data, the code below simulates some points already around a center.
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
# calculate some random points to serve as cluster centers; run a few steps of a relaxing algorithm to separate them a bit
def random_distibuted_centers():
cx = np.random.uniform(-10, 10, MAX_CLUST + 1)
cy = np.random.uniform(-10, 10, MAX_CLUST + 1)
for _ in range(10):
for i in range(1, MAX_CLUST + 1):
for j in range(1, MAX_CLUST + 1):
if i != j:
dist = np.linalg.norm([cx[i] - cx[j], cy[i] - cy[j]])
if dist < 4:
cx[i] += 0.4 * (cx[i] - cx[j]) / dist
cy[i] += 0.4 * (cy[i] - cy[j]) / dist
return cx, cy
N = 1000
MAX_CLUST = 31
cx, cy = random_distibuted_centers()
# for demonstration purposes, just generate some random points around the centers
x = np.concatenate( [np.random.normal(cx[i], 2, N) for i in range(1,MAX_CLUST+1)])
y = np.concatenate( [np.random.normal(cy[i], 2, N) for i in range(1,MAX_CLUST+1)])
communities = np.repeat(range(1,MAX_CLUST+1), N)
final_df2 = pd.DataFrame({'x':x, 'y':y, 'cluster': communities})
no_clusters = max(communities)
cluster_list = list(range (min(communities), no_clusters+1))
fig2, ax = plt.subplots(figsize = (20,15))
plt.scatter(x,y, c=final_df2['cluster'], cmap=plt.cm.get_cmap('hsv', max(cluster_list)), s=0.5)
plt.title('Phenograph on UMAP - All Markers (auto)', fontsize=15)
plt.xlabel('umap_1', fontsize=15)
plt.ylabel('umap_2', fontsize=15)
plt.colorbar(extend='both',ticks = cluster_list)
bbox_props = dict(boxstyle="circle,pad=0.3", fc="white", ec="black", lw=2, alpha=0.9)
for i in range(1,MAX_CLUST+1):
ax.annotate(i, xy=(cx[i], cy[i]), ha='center', va='center', bbox=bbox_props)
plt.show()

Categories