How to align text with ylabel in matplotlib? - python

I want to add a text "a)", "b)" and "c)" in subfigures and align it with the yaxis label.
Lets say we have a simple plot, with variable y-axis tick labels "0.8", "0.08", "0.016". This would increase the distance between ax[i].transAxes=0 and ylabel.
import matplotlib.pyplot as plt
import numpy as np
x=np.arange(0,1,0.01)
y=np.sin(x)
fig,ax=plt.subplots(1,3,figsize=(6,4))
ax[0].plot(x,y)
ax[1].plot(x,y*0.1)
ax[2].plot(x,y*0.02)
for i in range(3):
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].set_ylabel('Sine')
ax[i].set_xlabel('x')
plt.tight_layout()
abc='abc'
for i in range(3):
ax[i].text(0,1,abc[i]+')',transform=ax[i].transAxes)
plt.show()
Currently I am trying to find the (x,y) position by trial and error to right align "a)", "b)" or "c)" with the ylabel. Is there a better way to do this?
If I try to get the position of ylabel using,
ylbl=ax[i].set_ylabel('Sine')
print(ylbl.get_position())
I get (0,0.5), which is not really helpful.
More bizzare is when I do a tight_layout.
ylbls=[]
for i in range(3):
ylbls.append(ax[i].set_ylable('Sine'))
plt.tight_layout()
for i in range(3):
print(ylbls[i].get_position()
I get values (37.7,0.5), (201.8,0.5), (365.9,0.5). I have no idea what these 37.7, 201.8, 365.9 imply and if I can use them to align ylabel with my text somehow?

First, get the y-label text box information, and then use the blend transform method, x is from y-label box info, y is from ax[i].transAxes.
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.transforms import IdentityTransform
import matplotlib.transforms as transforms
x=np.arange(0,1,0.01)
y=np.sin(x)
fig,ax=plt.subplots(1,3,figsize=(7,5))
ax[0].plot(x,y)
ax[1].plot(x,y*0.1)
ax[2].plot(x,y*0.02)
for i in range(3):
ax[i].spines['top'].set_visible(False)
ax[i].spines['right'].set_visible(False)
ax[i].set_ylabel('Sine')
ax[i].set_xlabel('x')
plt.tight_layout()
fig.canvas.draw()
abc = r'abc'
for i in range(3):
iax = ax[i]
trans = transforms.blended_transform_factory(IdentityTransform(), iax.transAxes)
bb = iax.yaxis.label.get_window_extent()
iax.text(bb.x0-3,1.01,abc[i]+')',ha='left',fontsize=14,transform=trans)
plt.savefig('output_text.png',dpi=300)

Related

Overflow x label MatPlotLib

I have this bar graph but the X labels that are long keep overflowing into the other label. Is there a way I can create more space or cause a line break when it is doing this?
Below is the part of the code that accounts for the graph
import pandas as pd
import matplotlib.pyplot as plt
ax = tweets_df.plot(kind='bar', x='name', y='tweet_volume', fontsize=7, width=.5)
ax.set_xlabel('Hastag')
ax.set_ylabel('Tweets w/ Hashtag')
plt.xticks(rotation='horizontal')
plt.show()
IMHO you can use rotation=90 instead of rotation='horizontal' or if you want to keep horizontal and truncating values,
import pandas as pd
import matplotlib.pyplot as plt
N = 5
ax = tweets_df.plot(kind='bar', x='name', y='tweet_volume', fontsize=7, width=.5)
ax.set_xlabel('Hastag')
ax.set_ylabel('Tweets w/ Hashtag')
plt.xticks(rotation='horizontal')
labels = [item.get_text() for item in ax.get_xticklabels()]
ax.set_xticklabels([label[:N] for label in labels])
plt.show()

How to make horizontal linechart with categorical variables and timeseries?

I want to replicate plots from this paper: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5000555/pdf/nihms774453.pdf I'm particularly interested in plot on page 16, right panel. I tried to do this in matplotlib but it seems to me that there is no way to access lines in linecollection.
I don't know how to change the color of the each line, according to the value at every index. I'd like to eventually get something like here: https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/multicolored_line.html but for every line, according to the data.
this is what I tried:
the data in numpy array: https://pastebin.com/B1wJu9Nd
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from matplotlib import colors as mcolors
%matplotlib inline
base_range = np.arange(qq.index.max()+1)
fig, ax = plt.subplots(figsize=(12,8))
ax.set_xlim(qq.index.min(), qq.index.max())
# ax.set_ylim(qq.columns[0], qq.columns[-1])
ax.set_ylim(-5, len(qq.columns) +5)
line_segments = LineCollection([np.column_stack([base_range, [y]*len(qq.index)]) for y in range(len(qq.columns))],
cmap='viridis',
linewidths=(5),
linestyles='solid',
)
line_segments.set_array(base_range)
ax.add_collection(line_segments)
axcb = fig.colorbar(line_segments)
plt.show()
my result:
what I want to achieve:

how to reduce y-axis in matplot with same distance

I want this plot's y-axis to be centered at 38, and the y-axis scaled such that the 'humps' disappear. How do I accomplish this?
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
s=['05/02/2019', '06/02/2019', '07/02/2019', '08/02/2019',
'09/02/2019', '10/02/2019', '11/02/2019', '12/02/2019',
'13/02/2019', '20/02/2019', '21/02/2019', '22/02/2019',
'23/02/2019', '24/02/2019', '25/02/2019']
df[0]=['38.02', '33.79', '34.73', '36.47', '35.03', '33.45',
'33.82', '33.38', '34.68', '36.93', '33.44', '33.55',
'33.18', '33.07', '33.17']
# Data for plotting
fig, ax = plt.subplots(figsize=(17, 2))
for i,j in zip(s,df[0]):
ax.annotate(str(j),xy=(i,j+0.8))
ax.plot(s, df[0])
ax.set(xlabel='Dates', ylabel='Latency',
title='Hongkong to sing')
ax.grid()
#plt.yticks(np.arange(min(df[p]), max(df[p])+1, 2))
fig.savefig("test.png")
plt.show()
I'm not entirely certain if this is what you're looking for but you can adjust the y-limits explicitly to change the scale, i.e.
ax.set_ylim([ax.get_ylim()[0], 42])
Which only sets the upper bound, leaving the lower limit unchanged, this would give you
you can supply any values you find appropriate, i.e.
ax.set_ylim([22, 52])
will give you something that looks like
Also note that the tick labels and general appearance of your plot will differ from what is shown here.
Edit - Here is the complete code as requested:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
df = pd.DataFrame()
s=['05/02/2019', '06/02/2019', '07/02/2019', '08/02/2019',
'09/02/2019', '10/02/2019', '11/02/2019', '12/02/2019',
'13/02/2019', '20/02/2019', '21/02/2019', '22/02/2019',
'23/02/2019', '24/02/2019', '25/02/2019']
df[0]=['38.02','33.79','34.73','36.47','35.03','33.45',
'33.82','33.38','34.68','36.93','33.44','33.55',
'33.18','33.07','33.17']
# Data for plotting
fig, ax = plt.subplots(figsize=(17, 3))
#for i,j in zip(s,df[0]):
# ax.annotate(str(j),xy=(i,j+0.8))
ax.plot(s, pd.to_numeric(df[0]))
ax.set(xlabel='Dates', ylabel='Latency',
title='Hongkong to sing')
ax.set_xticklabels(pd.to_datetime(s).strftime('%m.%d'), rotation=45)
ax.set_ylim([22, 52])
plt.show()

Pyplot set_xticks doesn't work as expected

I want to set the x tick density by specifying how many ticks to skip each time. For example, if the x axis is labelled by 100 consecutive dates, and I want to skip every 10 dates, then I will do something like
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ts = pd.period_range("20060101", periods=100).strftime("%Y%m%d")
y = np.random.randn(100)
ax = plt.subplot(1, 1, 1)
ax.plot(ts, y)
xticks = ax.get_xticks()
ax.set_xticks(xticks[::10])
plt.xticks(rotation="vertical")
plt.show()
However the output is out of place. Pyplot only picks the first few ticks and place them all in the wrong positions, although the spacing is correct:
What can I do to get the desired output? Namely the ticks should be instead:
['20060101' '20060111' '20060121' '20060131' '20060210' '20060220'
'20060302' '20060312' '20060322' '20060401']
#klim's answer seems to put the correct marks on the axis, but the labels still won't show. An example where the date axis is correctly marked yet without labels:
Set xticklabels also. Like this.
xticks = ax.get_xticks()
xticklabels = ax.get_xticklabels()
ax.set_xticks(xticks[::10])
ax.set_xticklabels(xticklabels[::10], rotation=90)
Forget the above, which doesn't work.
How about this?
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ts = pd.period_range("20060101", periods=100).strftime("%Y%m%d")
x = np.arange(len(ts))
y = np.random.randn(100)
ax = plt.subplot(1, 1, 1)
ax.plot(x, y)
ax.set_xticks(x[::10])
ax.set_xticklabels(ts[::10], rotation="vertical")
plt.show()
This works on my machine.

Set the maximum number of rows in legend

I need to draw several datasets within a single plot. The number of datasets varies, so I don't know a priori how many there will be.
If I just draw the legends, I get this (MCVE below):
How can I tell plt.legend() to only draw say the first 10 legends? I've looked around the plt.legends() class but there seems to be no argument to set such a value.
MCVE:
import numpy as np
import matplotlib.pyplot as plt
dataset = []
for _ in range(20):
dataset.append(np.random.uniform(0, 1, 2))
lbl = ['adfg', 'dfgb', 'cgfg', 'rtbd', 'etryt', 'frty', 'jklg', 'jklh',
'ijkl', 'dfgj', 'kbnm', 'bnmbl', 'qweqw', 'fghfn', 'dfg', 'hjt', 'dfb',
'sdgdas', 'werwe', 'dghfg']
for i, xy in enumerate(dataset):
plt.scatter(xy[0], xy[1], label=lbl[i])
plt.legend()
plt.savefig('test.png')
You can just limit the number of labels shown.
import matplotlib.pyplot as plt
maxn = 16
for i in range(25):
plt.scatter(.5, .5, label=(i//maxn)*"_"+str(i))
plt.legend()
plt.show()
This method works also for text labels of course:
import numpy as np
import matplotlib.pyplot as plt
labels = ["".join(np.random.choice(list("ABCDEFGHIJK"), size=8)) for k in range(25)]
maxn = 16
for i,l in enumerate(labels):
plt.scatter(.5, .5, label=(i//maxn)*"_"+l)
plt.legend()
plt.show()
The reason this works is that labels starting with "_" are ignored in the legend. This is used internally to give objects a label without showing them in the legend but can of course also be used by us to limit the number of elements in the legend.
I would like to suggest an alternative way to get your desired output, which I feel relies less on a "hack" of the legend labels.
You can use the function Axes.get_legend_handles_labels() to get a list of the handles and the labels of the objects that are to be put in the legend.
You can truncate these lists however you feel like, before passing them to plt.legend(). For instance:
import numpy as np
import matplotlib.pyplot as plt
dataset = []
for _ in range(20):
dataset.append(np.random.uniform(0, 1, 2))
lbl = ['adfg', 'dfgb', 'cgfg', 'rtbd', 'etryt', 'frty', 'jklg', 'jklh',
'ijkl', 'dfgj', 'kbnm', 'bnmbl', 'qweqw', 'fghfn', 'dfg', 'hjt', 'dfb',
'sdgdas', 'werwe', 'dghfg']
fig, ax = plt.subplots()
for i, xy in enumerate(dataset):
ax.scatter(xy[0], xy[1], label=lbl[i])
h,l = ax.get_legend_handles_labels()
plt.legend(h[:3], l[:3]) # <<<<<<<< This is where the magic happens
plt.show()
You could even display every other label plt.legend(h[::2], l[::2]) or whatever else you want.

Categories