ternary plots as subplot

ternary plots as subplot - python

I want to draw multiple ternary graphs and thought to do this using matplotlib's subplot.
I'm just getting empty 'regular' plots though, not the ternary graphs I want in there. I found the usage of
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax)
so this seems to be possible, but can't really find out how to get this working. Any ideas?
Code I'm using:
I'm using a for loop as the data has columns named tria1-a, tria2-a, etc for the different triads
import ternary
import matplotlib.pyplot as plt
import pandas as pd
#configure file to import.
filename = 'somecsv.csv'
filelocation = 'location'
dfTriad = pd.read_csv(filelocation+filename)
# plot the data
scale = 33
figure, ax = plt.subplots()
tax = ternary.TernaryAxesSubplot(ax=ax, scale=scale)
figure.set_size_inches(10, 10)
tax.set_title("Scatter Plot", fontsize=20)
tax.boundary(linewidth=2.0)
tax.gridlines(multiple=1, color="blue")
tax.legend()
tax.ticks(axis='lbr', linewidth=1, multiple=5)
tax.clear_matplotlib_ticks()
#extract the xyz columns for the triads from the full dataset
for i in range(1,6) :
key_x = 'tria'+ str(i) + '-a'
key_y = 'tria' + str(i) + '-b'
key_z = 'tria' + str(i) + '-c'
#construct dataframe from the extracted xyz columns
dfTriad_data = pd.DataFrame(dfTriad[key_x], columns=['X'])
dfTriad_data['Y'] = dfTriad[key_y]
dfTriad_data['Z'] = dfTriad[key_z]
#create list of tuples from the constructed dataframe
triad_data = [tuple(x) for x in dfTriad_data.to_records(index=False)]
plt.subplot(2, 3, i)
tax.scatter(triad_data, marker='D', color='green', label="")
tax.show()

I had the same problem and could solve it by first "going" into the subplot, then creating the ternary figure in there by giving plt.gca() as keyword argument ax:
plt.subplot(2,2,4, frameon = False)
scale = 10
plt.gca().get_xaxis().set_visible(False)
plt.gca().get_yaxis().set_visible(False)
figure, tax = ternary.figure(ax = plt.gca(), scale = scale)
#now you can use ternary normally:
tax.line(scale * np.array((0.5,0.5,0.0)), scale*np.array((0.0, 0.5, 0.5)))
tax.boundary(linewidth=1.0)
#...

Related

Python: Changing visual parameters of ptitprince repo derived from seaborn and matplotlib

I am using a github repository called ptitprince, which is derived from seaborn and matplotlib, to generate graphs.
For example, this is the code using the ptitprince repo:
# coding: utf8
import pandas as pd
import ptitprince as pt
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
df = pd.read_csv ("u118phag.csv", sep= ",")
df.head()
savefigs = True
figs_dir = 'figs'
if savefigs:
# Make the figures folder if it doesn't yet exist
if not os.path.isdir('figs'):
os.makedirs('figs')
#automation
f, ax = plt.subplots(figsize=(4, 5))
#f.subplots_adjust(hspace=0,wspace=0)
dx = "Treatment"; dy = "score"; ort = "v"; pal = "Set2"; sigma = .2
ax=pt.RainCloud(x = dx, y = dy, data = df, palette = pal, bw = sigma,
width_viol = .6, ax = ax, move=.2, offset=.1, orient = ort, pointplot = True)
f.show()
if savefigs:
f.savefig('figs/figure20.png', bbox_inches='tight', dpi=500)
which generates the following graph
The raw code not using ptitprince is as follows and produces the same graph as above:
# coding: utf8
import pandas as pd
import ptitprince as pt
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
df = pd.read_csv ("u118phag.csv", sep= ",")
df.head()
savefigs = True
figs_dir = 'figs'
if savefigs:
# Make the figures folder if it doesn't yet exist
if not os.path.isdir('figs'):
os.makedirs('figs')
f, ax = plt.subplots(figsize=(7, 5))
dy="Treatment"; dx="score"; ort="h"; pal = sns.color_palette(n_colors=1)
#adding color
pal = "Set2"
f, ax = plt.subplots(figsize=(7, 5))
ax=pt.half_violinplot( x = dx, y = dy, data = df, palette = pal, bw = .2, cut = 0.,
scale = "area", width = .6, inner = None, orient = ort)
ax=sns.stripplot( x = dx, y = dy, data = df, palette = pal, edgecolor = "white",
size = 3, jitter = 1, zorder = 0, orient = ort)
ax=sns.boxplot( x = dx, y = dy, data = df, color = "black", width = .15, zorder = 10,\
showcaps = True, boxprops = {'facecolor':'none', "zorder":10},\
showfliers=True, whiskerprops = {'linewidth':2, "zorder":10},\
saturation = 1, orient = ort)
if savefigs:
f.savefig('figs/figure21.png', bbox_inches='tight', dpi=500)
Now, what I'm trying to do is to figure out how to modify the graph so that I can (1) move the plots closer together, so there is not so much white space between them, and (2) shift the x-axis to the right, so that I can make the distribution (violin) plot wider without it getting cut in half by the y-axis.
I have tried to play around with subplots_adjust() as you can see in the first box of code, but I receive an error. I cannot figure out how to appropriately use this function, or even if that will actually bring the different graphs closer together.
I also know that I can increase the distribution size by increasing this value width = .6, but if I increase it too high, the distribution plot begins to being cut off by the y-axis. I can't figure out if I need to adjust the overall plot using the plt.subplots,or if I need to move each individual plot.
Any advice or recommendations on how to change the visuals of the graph? I've been staring at this for awhile, and I can't figure out how to make seaborn/matplotlib play nicely with ptitprince.

You may try to change the interval of X-axis being shown using ax.set_xbound (put a lower value than you currently have for the beginning).

Python/Matplotlib - Find the highest value of a group of bars

I have this image from Matplotlib :
I would like to write for each category (cat i with i in [1-10] in the figure) the highest value and its corresponding legend on the graphic.
Below you can find visually what I would like to achieve :
The thing is the fact that I don't know if it is possible because of the way of plotting from matplotlib.
Basically, this is the part of the code for drawing multiple bars :
# create plot
fig, ax = plt.subplots(figsize = (9,9))
index = np.arange(len_category)
if multiple:
bar_width = 0.3
else :
bar_width = 1.5
opacity = 1.0
#test_array contains test1 and test2
cmap = get_cmap(len(test_array))
for i in range(len(test_array)):
count = count + 1
current_label = test_array[i]
rects = plt.bar(index-0.2+bar_width*i, score_array[i], bar_width, alpha=opacity, color=np.random.rand(3,1),label=current_label )
plt.xlabel('Categories')
plt.ylabel('Scores')
plt.title('Scores by Categories')
plt.xticks(index + bar_width, categories_array)
plt.legend()
plt.tight_layout()
plt.show()
and this is the part I have added in order to do what I would like to achieve. But it searches the max across all the bars in the graphics. For example, the max of test1 will be in cat10 and the max of test2 will be cat2. Instead, I would like to have the max for each category.
for i in range(len(test_array)):
count = count + 1
current_label = test_array[i]
rects = plt.bar(index-0.2+bar_width*i, score_array[i], bar_width,alpha=opacity,color=np.random.rand(3,1),label=current_label )
max_score_current = max(score_array[i])
list_rect = list()
max_height = 0
#The id of the rectangle who get the highest score
max_idx = 0
for idx,rect in enumerate(rects):
list_rect.append(rect)
height = rect.get_height()
if height > max_height:
max_height = height
max_idx = idx
highest_rect = list_rect[max_idx]
plt.text(highest_rect.get_x() + highest_rect.get_width()/2.0, max_height, str(test_array[i]),color='blue', fontweight='bold')
del list_rect[:]
Do you have an idea about how I can achieve that ?
Thank you

It usually better to keep data generation and visualization separate. Instead of looping through the bars themselves, just get the necessary data prior to plotting. This makes everything a lot more simple.
So first create a list of labels to use and then loop over the positions to annotate then. In the code below the labels are created by mapping the argmax of a column array to the test set via a dictionary.
import numpy as np
import matplotlib.pyplot as plt
test1 = [6,4,5,8,3]
test2 = [4,5,3,4,6]
labeldic = {0:"test1", 1:"test2"}
a = np.c_[test1,test2]
maxi = np.max(a, axis=1)
l = ["{} {}".format(i,labeldic[j]) for i,j in zip(maxi, np.argmax(a, axis=1))]
for i in range(a.shape[1]):
plt.bar(np.arange(a.shape[0])+(i-1)*0.3, a[:,i], width=0.3, align="edge",
label = labeldic[i])
for i in range(a.shape[0]):
plt.annotate(l[i], xy=(i,maxi[i]), xytext=(0,10),
textcoords="offset points", ha="center")
plt.margins(y=0.2)
plt.legend()
plt.show()

From your question it is not entirely clear what you want to achieve, but assuming that you want the relative height of each bar in one group printed above that bar, here is one way to achieve that:
from matplotlib import pyplot as plt
import numpy as np
score_array = np.random.rand(2,10)
index = np.arange(score_array.shape[1])
test_array=['test1','test2']
opacity = 1
bar_width = 0.25
for i,label in enumerate(test_array):
rects = plt.bar(index-0.2+bar_width*i, score_array[i], bar_width,alpha=opacity,label=label)
heights = [r.get_height() for r in rects]
print(heights)
rel_heights = [h/max(heights) for h in heights]
idx = heights.index(max(heights))
for i,(r,h, rh) in enumerate(zip(rects, heights, rel_heights)):
plt.text(r.get_x() + r.get_width()/2.0, h, '{:.2}'.format(rh), color='b', fontweight ='bold', ha='center')
plt.show()
The result looks like this:

Linear Regression: Extending line past data and adding a legend

I have a code:
import math
import numpy as np
import pylab as plt1
from matplotlib import pyplot as plt
uH2 = 1.90866638
uHe = 3.60187307
eH2 = 213.38
eHe = 31.96
R = float(uH2*eH2)/(uHe*eHe)
C_Values = []
Delta = []
kHeST = []
J_f21 = []
data = np.genfromtxt("Lamda_HeHCL.txt", unpack=True);
J_i1=data[1];
J_f1=data[2];
kHe=data[7]
data = np.genfromtxt("Basecol_Basic_New_1.txt", unpack=True);
J_i2=data[0];
J_f2=data[1];
kH2=data[5]
print kHe
print kH2
kHe = map(float, kHe)
kH2 = map(float, kH2)
kHe = np.array(kHe)
kH2= np.array(kH2)
g = len(kH2)
for n in range(0,g):
if J_f2[n] == 1:
Jf21 = J_f2[n]
J_f21.append(Jf21)
ratio = kHe[n]/kH2[n]
C = (((math.log(float(kH2[n]),10)))-(math.log(float(kHe[n]),10)))/math.log(R,10)
C_Values.append(C)
St = abs(J_f1[n] - J_i1[n])
Delta.append(St)
print C_Values
print Delta
print J_f21
fig, ax = plt.subplots()
ax.scatter(Delta,C_Values)
for i, txt in enumerate(J_f21):
ax.annotate(txt, (Delta[i],C_Values[i]))
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
plt.plot(Delta, C_Values)
fit = np.polyfit(Delta,C_Values,1)
fit_fn = np.poly1d(fit)
# fit_fn is now a function which takes in x and returns an estimate for y
plt.scatter(Delta,C_Values, Delta, fit_fn(Delta))
plt.xlim(0, 12)
plt.ylim(-3, 3)
In this code, I am trying to plot a linear regression that extends past the data and touches the x-axis. I am also trying to add a legend to the plot that shows the slope of the plot. Using the code, I was able to plot this graph.
Here is some trash data I have been using to try and extend the line and add a legend to my code.
x =[5,7,9,15,20]
y =[10,9,8,7,6]
I would also like it to be a scatter except for the linear regression line.

Given that you don't provide the data you're loading from files I was unable to test this, but off the top of my head:
To extend the line past the plot, you could turn this line
plt.plot(np.unique(Delta), np.poly1d(np.polyfit(Delta, C_Values, 1))(np.unique(Delta)))
Into something like
x = np.linspace(0, 12, 50) # both 0 and 12 are from visually inspecting the plot
plt.plot(x, np.poly1d(np.polyfit(Delta, C_Values, 1))(x))
But if you want the line extended to the x-axis,
polynomial = np.polyfit(Delta, C_Values, 1)
x = np.linspace(0, *np.roots(polynomial))
plt.plot(x, np.poly1d(polynomial)(x))
As for the scatter plot thing, it seems to me you could just remove this line:
plt.plot(Delta, C_Values)
Oh right, as for the legend, add a label to the plots you make, like this:
plt.plot(x, np.poly1d(polynomial)(x), label='Linear regression')
and add a call to plt.legend() just before plt.show().

python scatter plot with errorbars and colors mapping a physical quantity

I'm trying to do a quite simple scatter plot with error bars and semilogy scale. What is a little bit different from tutorials I have found is that the color of the scatterplot should trace a different quantity. On one hand, I was able to do a scatterplot with the errorbars with my data, but just with one color. On the other hand, I realized a scatterplot with the right colors, but without the errorbars.
I'm not able to combine the two different things.
Here an example using fake data:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
n=100
Lx_gas = 1e40*np.random.random(n) + 1e37
Tx_gas = np.random.random(n) + 0.5
Lx_plus_error = Lx_gas
Tx_plus_error = Tx_gas/2.
Tx_minus_error = Tx_gas/4.
#actually positive numbers, this is the quantity that should be traced by the
#color, in this example I use random numbers
Lambda = np.random.random(n)
#this is actually different from zero, but I want to be sure that this simple
#code works with the log axis
Lx_minus_error = np.zeros_like(Lx_gas)
#normalize the color, to be between 0 and 1
colors = np.asarray(Lambda)
colors -= colors.min()
colors *= (1./colors.max())
#build the error arrays
Lx_error = [Lx_minus_error, Lx_plus_error]
Tx_error = [Tx_minus_error, Tx_plus_error]
##--------------
##important part of the script
##this works, but all the dots are of the same color
#plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error,fmt='o')
##this is what is should be in terms of colors, but it is without the error bars
#plt.scatter(Tx_gas, Lx_gas, marker='s', c=colors)
##what I tried (and failed)
plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error,\
color=colors, fmt='o')
ax = plt.gca()
ax.set_yscale('log')
plt.show()
I even tried to plot the scatterplot after the errorbar, but for some reason everything plotted on the same window is put in background with respect to the errorplot.
Any ideas?
Thanks!

You can set the color to the LineCollection object returned by the errorbar as described here.
from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
n=100
Lx_gas = 1e40*np.random.random(n) + 1e37
Tx_gas = np.random.random(n) + 0.5
Lx_plus_error = Lx_gas
Tx_plus_error = Tx_gas/2.
Tx_minus_error = Tx_gas/4.
#actually positive numbers, this is the quantity that should be traced by the
#color, in this example I use random numbers
Lambda = np.random.random(n)
#this is actually different from zero, but I want to be sure that this simple
#code works with the log axis
Lx_minus_error = np.zeros_like(Lx_gas)
#normalize the color, to be between 0 and 1
colors = np.asarray(Lambda)
colors -= colors.min()
colors *= (1./colors.max())
#build the error arrays
Lx_error = [Lx_minus_error, Lx_plus_error]
Tx_error = [Tx_minus_error, Tx_plus_error]
sct = plt.scatter(Tx_gas, Lx_gas, marker='s', c=colors)
cb = plt.colorbar(sct)
_, __ , errorlinecollection = plt.errorbar(Tx_gas, Lx_gas, xerr = Tx_error,yerr = Lx_error, marker = '', ls = '', zorder = 0)
error_color = sct.to_rgba(colors)
errorlinecollection[0].set_color(error_color)
errorlinecollection[1].set_color(error_color)
ax = plt.gca()
ax.set_yscale('log')
plt.show()

matplotlib twinx xticks at specific locations

I have the code below, in the x-axis, i want to show only the parameter values for which i have the metric values which are 5,10,20 and 50.
I want the parameter values to span the x-axis.
How I can do it ?.
import matplotlib.pyplot as plt;
import numpy as np;
from matplotlib import rc;
fig, ax1 = plt.subplots();
rc('mathtext', default='regular');
x = np.array([5,10,20,50]);
cg1 = np.array([0.1,0.3,0.5,0.8]);
cg2 = np.array([0.2,0.2,0.4,0.7]);
cg3 = np.array([0.3,0.4,0.6,0.6]);
lns1 = ax1.plot(x,cg1,'b*:',label='1 CG');
lns2 = ax1.plot(x,cg2,'bo--',label='2 CG');
lns3 = ax1.plot(x,cg3,'bs-',label='3 CG');
ax1.set_ylabel('CG',color='b');
ax1.set_ylim([0,1]);
ax1.set_xlim([4,55]);
ax1.set_xticklabels([5,10,20,50]);
ax1.set_xlabel('K');
ax2 = ax1.twinx();
ld1 = np.array([0.8,0.5,0.2,0.2]);
ld2 = np.array([0.6,0.2,0.3,0.2]);
ld3 = np.array([0.2,0.4,0.6,0.2]);
lns4 = ax2.plot(x,ld1,'k*:',label='1 ld');
lns5 = ax2.plot(x,ld2,'ko--',label='2 ld');
lns6 = ax2.plot(x,ld3,'ks-',label='3 ld');
lns = lns1 + lns2 + lns3 + lns4 + lns5 + lns6;
labs = [l.get_label() for l in lns];
ax1.legend(lns, labs, loc='best', ncol=2);
ax2.set_ylabel('LD',color='k');
ax2.set_ylim([0,1]);
ax2.set_xlim([4,55]);
plt.show();

Try replacing line with ax1.set_xlim([4,55]) with this line:
ax1.set_xticks(x)
You may also want to remove ax2.set_xlim(...).
Does it give you what you expected?
UPDATE Following comments:
Please use these lines (NOTE: the order matters!):
ax1.set_xlim([4,55]);
ax1.set_xticks(x)
...
ax2.set_xlim([4,55]);
ax2.set_xticks(x)
And remove anything else that touches xticks, like any of these:
ax1.set_xticklabels([5,10,20,50]);
ax2.set_xticklabels([5,10,20,50]);
This should produce a chart like this:
Which has limits at [4, 55] and only selected tick values visible.

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ternary plots as subplot - python

Related

Python: Changing visual parameters of ptitprince repo derived from seaborn and matplotlib

Python/Matplotlib - Find the highest value of a group of bars

Linear Regression: Extending line past data and adding a legend

python scatter plot with errorbars and colors mapping a physical quantity

matplotlib twinx xticks at specific locations

Categories

Resources