1) I am not able to see the text-based xticks which are stored as list in the variable x. When I have only one single column based bar plot, I can see the xticks as text but not for more.
2)how can I control the font properties of xticks and the values in y axis?
Thank you.
import matplotlib.pyplot as plt
import pylab as pl
import numpy as np
#load text and columns into different variables
data = np.genfromtxt('a', names=True, dtype=None, usecols=("X", "N2", "J2", "V2", "asd", "xyz"))
x = data['X']
n = data['N2']
j = data['J2']
v = data['V2']
#make x axis string based labels
r=np.arange(1,25,1.5)
plt.xticks(r,x) #make sure dimension of x and n matches
plt.figure(figsize=(3.2,2), dpi=300, linewidth=3.0)
ax = plt.subplot(111)
ax.bar(r,v,width=0.9,color='red',edgecolor='black', lw=0.5, align='center')
plt.axhline(y=0,linewidth=1.0,color='black') #horizontal line at y=0
plt.axis([0.5,16.5,-0.4,0.20])
ax.bar(r,j,width=0.6,color='green',edgecolor='black', lw=0.5, align='center')
ax.bar(r,n,width=0.3,color='blue',edgecolor='black', lw=0.5, align='center')
plt.axhline(y=0,linewidth=1,color='black') #horizontal line at y=0
plt.axis([0.5,24.5,-0.36,0.15])
plt.savefig('fig',dpi=300,format='png',orientation='landscape')
The way you're doing it, you just need to move the call to plt.xticks(r,x) to somewhere after you create the figure you're working on. Otherwise pyplot will create a new figure for you.
However, I would also consider switching to the more explicit object-oriented interface to matplotlib.
This way you'd use:
fig, ax = plt.subplots(1,1) # your only call to plt
ax.bar(r,v,width=0.9,color='red',edgecolor='black', lw=0.5, align='center')
ax.bar(r,j,width=0.6,color='green',edgecolor='black', lw=0.5, align='center')
ax.bar(r,n,width=0.3,color='blue',edgecolor='black', lw=0.5, align='center')
ax.set_xticks(r)
ax.set_xticklabels(x)
ax.axhline(y=0,linewidth=1,color='black')
fig.savefig('fig',dpi=300,format='png',orientation='landscape')
# or use plt.show() to see the figure interactively or inline, depending on backend
# (see Joe Kington's comment below)
Related
plt.figure(figsize = (8,5))
sns.countplot(data = HRdfMerged, x = 'Gender', hue='Attrition').set_title('Gender vs Attrition')
I'm having a hard time adding a label to the top of my bar that states the total number. I have tried many different ways but can't get it right. Im using matplotlib. Picture of bar chart added.
Once you have called sns.countplot, we will explore the list ax.patches to get information from the bars and place the texts you want:
# Imports.
import matplotlib.pyplot as plt
import seaborn as sns
# Load a dataset to replicate what you have in the question.
data = sns.load_dataset("titanic")
fig, ax = plt.subplots() # Use the object-oriented approach with Matplotlib when you can.
sns.countplot(data=data, x="class", hue="who", ax=ax)
ax.set_title("title goes here")
fig.show()
# For each bar, grab its coordinates and colors, find a suitable location
# for a text and place it there.
for patch in ax.patches:
x0, y0 = patch.get_xy() # Bottom-left corner.
x0 += patch.get_width()/2 # Middle of the width.
y0 += patch.get_height() # Top of the bar
color = patch.get_facecolor()
ax.text(x0, y0, str(y0), ha="center", va="bottom", color="white", clip_on=True, bbox=dict(ec="black",
fc=color))
Play around with the kwargs of ax.text to get the result you prefer. An alternative:
ax.text(x0, y0, str(y0), ha="center", va="bottom", color=color, clip_on=True)
You can also use the convenient Axes.bar_label method here to do this in just a couple lines.
Since seaborn does not return the BaContainer objects to us, we will need to access them from the Axes object via Axes.containers attribute.
import matplotlib.pyplot as plt
import seaborn as sns
data = sns.load_dataset("titanic")
fig, ax = plt.subplots()
sns.countplot(data=data, x="class", hue="who", ax=ax)
for bar_contain in ax.containers:
ax.bar_label(bar_contain)
I am plotting a certain categorical value over the map of a city. The line of code I use to plot is the following:
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
urban_data.plot(column="category", cmap="viridis", ax=ax, categorical=True, /
k=4, legend=True, linewidth=0.5, /
legend_kwds={'fontsize':'19', 'loc':'lower left'})
where urban data is a geopandas dataframe, and I am using matplotlib as plotting library. The argument legend_kwds allows me to control minor things on the legend, like the position or the font size, but I cannot decide major things like, for example, the order of the entries in the legend box. In fact my categories are ranked, let's say 1-2-3-4, but I always get them displayed in a different order.
Is it possible to have more control over the legend? For example by calling it outside the gdf.plot() function? And, if so, how do I match the colors in the legend with those in the map, which are discrete values (that I don't know exactly) of a viridis colormap?
EDIT: here is a verifiable example. Unfortunately shapefiles need other files to work, and here a geometry (an area, not a point) column is needed, so I have to ask you to download this shpfile of the US. Everything you need is within this folder. Here's the code to reproduce the issue. The plot in output is bad because I did not care about the coordinates system here, but the important thing is the legend.
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
gdf=gpd.read_file('.../USA_adm1.shp')
clusters=np.random.randint(0,4, size=52)
gdf['cluster']=clusters
clusdict={1: 'lower-middle', 2: 'upper-middle', 3: 'upper', 0: 'lower'}
gdf['cluster']=gdf['cluster'].map(clusdict)
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
gdf.plot(column='cluster',cmap='viridis', categorical=True, legend=True, ax=ax)
The bad news is that categories in legends produced by geopandas are sorted and this is hardcoded (see source-code here).
One solution is hence to have the categorical column such that if it is sorted, it would correspond to the desired order. Using integers seems fine for that. Then one can replace the names in the legend, once it is produced in the correct order.
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
gdf=gpd.read_file('data/USA_adm/USA_adm1.shp')
clusters=np.random.randint(0,4, size=52)
gdf['cluster']=clusters
clusdict={1: 'lower-middle', 2: 'upper-middle', 3: 'upper', 0: 'lower'}
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
gdf.plot(column='cluster',cmap='viridis', categorical=True, legend=True, ax=ax)
def replace_legend_items(legend, mapping):
for txt in legend.texts:
for k,v in mapping.items():
if txt.get_text() == str(k):
txt.set_text(v)
replace_legend_items(ax.get_legend(), clusdict)
plt.show()
I had to alter the accepted answer (the second line in the function) from #ImportanceOfBeingErnest a bit to get it to work (maybe there have been updates since),
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
gdf=gpd.read_file('data/USA_adm/USA_adm1.shp')
clusters=np.random.randint(0,4, size=52)
gdf['cluster']=clusters
clusdict={1: 'lower-middle', 2: 'upper-middle', 3: 'upper', 0: 'lower'}
fig = plt.figure(figsize=(12, 12))
ax = plt.gca()
gdf.plot(column='cluster',cmap='viridis', categorical=True, legend=True, ax=ax)
def replace_legend_items(legend, mapping):
for txt in legend.get_texts():
for k,v in mapping.items():
if txt.get_text() == str(k):
txt.set_text(v)
replace_legend_items(ax.get_legend(), clusdict)
plt.show()
Assuming that you have 4 legends, you can do the following to set them in whatever order you like. The following code shows how to put them in the following order (using index): 0, 2, 3, 1.
Here ax is the axis object which you have define using ax = plt.gca()
handles,labels = ax.get_legend_handles_labels()
handles = [handles[0], handles[2], handles[3], handles[1]]
labels = [labels[0], labels[2], labels[3], labels[1]]
ax.legend(handles, labels)
Let me give you an example:
Default order
fig, ax = plt.subplots()
x = np.arange(5)
plt.plot(x, x, label=r'$y=x$')
plt.plot(x, 2*x, label=r'$y=2x$')
plt.plot(x, 3*x, label=r'$y=3x$')
plt.plot(x, 4*x, label=r'$y=4x$')
plt.legend(fontsize=16)
Manually changed order
fig, ax = plt.subplots()
x = np.arange(5)
plt.plot(x, x, label=r'$y=x$')
plt.plot(x, 2*x, label=r'$y=2x$')
plt.plot(x, 3*x, label=r'$y=3x$')
plt.plot(x, 4*x, label=r'$y=4x$')
handles,labels = ax.get_legend_handles_labels()
handles = [handles[0], handles[2],handles[3], handles[1]]
labels = [labels[0], labels[2], labels[3], labels[1]]
ax.legend(handles, labels, fontsize=16)
One can also use list comprehension using a pre-specified order list as
order = [0, 2, 3, 1]
handles,labels = ax.get_legend_handles_labels()
handles = [handles[i] for i in order]
labels = [labels[i] for i in order]
ax.legend(handles, labels, fontsize=16)
Sorry to ask such a basic question, but after hours (and hours) of frustration I'm turning to the list for some expert help.
I have two pandas dataframes, df1 and df2. df1 has columns A and B, while df2 has columns C and D. I want to use matplotlib to make a scatterplot of A vs. B, with labelled axes, and a histogram of C, also with a title on the x axis. Then I want to save both figures in pdf files.
I can accomplish the former with
import matplotlib.pyplot as plt
plt.scatter(df1['A'],df1['B'])
plt.xlabel('X title')
plt.ylabel('Y title')
plt.savefig('myfig1.pdf')
But I can't get the histogram to work, and if it does, it creates a graph with both the scatterplot and the histogram in it.
Any help greatly appreciated.
It sounds like you just need to make another figure for the histogram,
import matplotlib.pyplot as plt
fig1 = plt.figure()
plt.scatter(df1['A'],df1['B'])
plt.xlabel('X title')
plt.ylabel('Y title')
plt.savefig('myfig1.pdf')
fig2 = plt.figure()
... <histogram code>
Or you can assign the axes to variables so you dont have to do everything in order,
import random
x = [random.random() for i in range(50)]
y = [random.random() for i in range(50)]
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
fig2 = plt.figure()
ax2 = fig2.add_subplot(111)
ax1.scatter( x, y )
ax1.set_xlabel('X title')
ax1.set_ylabel('Y title')
fig1.savefig('myfig1.pdf')
ax2.hist( y )
Note that when setting properties of an axis using its methods, most of the plt attributes become set_X. For example, instead of plt.ylabel('my_y') you do ax1.set_ylabel('my_y'). You can still use the plt methods, but they will apply to whatever the current plot is. The variables ax1 and ax2 give you a little more freedom about when you do things.
I have a very simple question. I need to have a second x-axis on my plot and I want that this axis has a certain number of tics that correspond to certain position of the first axis.
Let's try with an example. Here I am plotting the dark matter mass as a function of the expansion factor, defined as 1/(1+z), that ranges from 0 to 1.
semilogy(1/(1+z),mass_acc_massive,'-',label='DM')
xlim(0,1)
ylim(1e8,5e12)
I would like to have another x-axis, on the top of my plot, showing the corresponding z for some values of the expansion factor. Is that possible? If yes, how can I have xtics ax
I'm taking a cue from the comments in #Dhara's answer, it sounds like you want to set a list of new_tick_locations by a function from the old x-axis to the new x-axis. The tick_function below takes in a numpy array of points, maps them to a new value and formats them:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twiny()
X = np.linspace(0,1,1000)
Y = np.cos(X*20)
ax1.plot(X,Y)
ax1.set_xlabel(r"Original x-axis: $X$")
new_tick_locations = np.array([.2, .5, .9])
def tick_function(X):
V = 1/(1+X)
return ["%.3f" % z for z in V]
ax2.set_xlim(ax1.get_xlim())
ax2.set_xticks(new_tick_locations)
ax2.set_xticklabels(tick_function(new_tick_locations))
ax2.set_xlabel(r"Modified x-axis: $1/(1+X)$")
plt.show()
You can use twiny to create 2 x-axis scales. For Example:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twiny()
a = np.cos(2*np.pi*np.linspace(0, 1, 60.))
ax1.plot(range(60), a)
ax2.plot(range(100), np.ones(100)) # Create a dummy plot
ax2.cla()
plt.show()
Ref: http://matplotlib.sourceforge.net/faq/howto_faq.html#multiple-y-axis-scales
Output:
From matplotlib 3.1 onwards you may use ax.secondary_xaxis
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1,13, num=301)
y = (np.sin(x)+1.01)*3000
# Define function and its inverse
f = lambda x: 1/(1+x)
g = lambda x: 1/x-1
fig, ax = plt.subplots()
ax.semilogy(x, y, label='DM')
ax2 = ax.secondary_xaxis("top", functions=(f,g))
ax2.set_xlabel("1/(x+1)")
ax.set_xlabel("x")
plt.show()
If You want your upper axis to be a function of the lower axis tick-values you can do as below. Please note: sometimes get_xticks() will have a ticks outside of the visible range, which you have to allow for when converting.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
ax1 = fig.add_subplot(111)
ax1.plot(range(5), range(5))
ax1.grid(True)
ax2 = ax1.twiny()
ax2.set_xticks( ax1.get_xticks() )
ax2.set_xbound(ax1.get_xbound())
ax2.set_xticklabels([x * 2 for x in ax1.get_xticks()])
title = ax1.set_title("Upper x-axis ticks are lower x-axis ticks doubled!")
title.set_y(1.1)
fig.subplots_adjust(top=0.85)
fig.savefig("1.png")
Gives:
Answering your question in Dhara's answer comments: "I would like on the second x-axis these tics: (7,8,99) corresponding to the x-axis position 10, 30, 40. Is that possible in some way?"
Yes, it is.
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(111)
a = np.cos(2*np.pi*np.linspace(0, 1, 60.))
ax1.plot(range(60), a)
ax1.set_xlim(0, 60)
ax1.set_xlabel("x")
ax1.set_ylabel("y")
ax2 = ax1.twiny()
ax2.set_xlabel("x-transformed")
ax2.set_xlim(0, 60)
ax2.set_xticks([10, 30, 40])
ax2.set_xticklabels(['7','8','99'])
plt.show()
You'll get:
I'm forced to post this as an answer instead of a comment due to low reputation.
I had a similar problem to Matteo. The difference being that I had no map from my first x-axis to my second x-axis, only the x-values themselves. So I wanted to set the data on my second x-axis directly, not the ticks, however, there is no axes.set_xdata. I was able to use Dhara's answer to do this with a modification:
ax2.lines = []
instead of using:
ax2.cla()
When in use also cleared my plot from ax1.
I use matplotib's Axes API to plot some figures. One of the lines I plot represents the theoretical expected line. It has no meaning outside of the original y and x limits. What I want, is for matlplotlib to ignore it when autoscaling the limits. What I used to do, is to check what are the current limits, then plot, and reset the limits. The problem is that when I plot a third plot, the limits get recalculated together with the theoretical line, and that really expands the graph.
# Boilerplate
from matplotlib.figure import Figure
from matplotlib.backends.backend_pdf import FigureCanvasPdf
from numpy import sin, linspace
fig = Figure()
ax = fig.add_subplot(1,1,1)
x1 = linspace(-1,1,100)
ax.plot(x1, sin(x1))
ax.plot(x1, 3*sin(x1))
# I wish matplotlib would not consider the second plot when rescaling
ax.plot(x1, sin(x1/2.0))
# But would consider the first and last
canvas_pdf = FigureCanvasPdf(fig)
canvas_pdf.print_figure("test.pdf")
The obvious way is to just manually set the limits to what you want. (e.g. ax.axis([xmin, xmax, ymin, ymax]))
If you don't want to bother with finding out the limits manually, you have a couple of options...
As several people (tillsten, Yann, and Vorticity) have mentioned, if you can plot the function you want to ignore last, then you can disable autoscaling before plotting it or pass the scaley=False kwarg to plot
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
ax.plot(x1, np.sin(x1))
ax.plot(x1, np.sin(x1 / 2.0))
ax.autoscale(False) #You could skip this line and use scalex=False on
ax.plot(x1, 3 * np.sin(x1)) #the "theoretical" plot. It has to be last either way
fig.savefig('test.pdf')
Note that you can adjust the zorder of the last plot so that it's drawn in the "middle", if you want control over that.
If you don't want to depend on the order, and you do want to just specify a list of lines to autoscale based on, then you could do something like this: (Note: This is a simplified version assuming you're dealing with Line2D objects, rather than matplotlib artists in general.)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms as mtransforms
def main():
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
line1, = ax.plot(x1, np.sin(x1))
line2, = ax.plot(x1, 3 * np.sin(x1))
line3, = ax.plot(x1, np.sin(x1 / 2.0))
autoscale_based_on(ax, [line1, line3])
plt.show()
def autoscale_based_on(ax, lines):
ax.dataLim = mtransforms.Bbox.unit()
for line in lines:
xy = np.vstack(line.get_data()).T
ax.dataLim.update_from_data_xy(xy, ignore=False)
ax.autoscale_view()
if __name__ == '__main__':
main()
Use the scalex/scaley kw arg:
plot(x1, 3*sin(x1), scaley=False)
LineCollection objects can be ignored by using the autolim=False argument:
from matplotlib.collections import LineCollection
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
# Will update limits
ax.plot(x1, np.sin(x1))
# Will not update limits
col = LineCollection([np.column_stack((x1, 3 * np.sin(x1)))], colors='g')
ax.add_collection(col, autolim=False)
# Will still update limits
ax.plot(x1, np.sin(x1 / 2.0))
This can be done regardless of plotting order by creating another axes to work on.
In this version, we create a twin axes and disable the autoscaling on that twin axes. In this way, the plot is scaled based on anything plotted in the original axes, but is not scaled by anything put into the twin axes.
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
twin_ax = ax.twinx() # Create a twin axes.
twin_ax.autoscale(False) # Turn off autoscaling on the twin axes.
twin_ax.set_yticks([]) # Remove the extra tick numbers from the twin axis.
ax.plot(x1, np.sin(x1))
twin_ax.plot(x1, 3 * np.sin(x1), c='green') # Plotting the thing we don't want to scale on in the twin axes.
ax.plot(x1, np.sin(x1 / 2.0))
twin_ax.set_ylim(ax.get_ylim()) # Make sure the y limits of the twin matches the autoscaled of the original.
fig.savefig('test.pdf')
Note, the above only prevents the un-twined axis from auto scaling (y in the above case). To get it to work for both x and y, we can do the twinning process for both x and y (or create the new axes from scratch):
import numpy as np
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
x2 = np.linspace(-2,2,100) # Would extend the x limits if auto scaled
twin_ax = ax.twinx().twiny() # Create a twin axes.
twin_ax.autoscale(False) # Turn off autoscaling on the twin axes.
twin_ax.set_yticks([]) # Remove the extra tick numbers from the twin axis.
twin_ax.set_xticks([]) # Remove the extra tick numbers from the twin axis.
ax.plot(x1, np.sin(x1))
twin_ax.plot(x2, 3 * np.sin(x2), c='green') # Plotting the thing we don't want to scale on in the twin axes.
ax.plot(x1, np.sin(x1 / 2.0))
twin_ax.set_ylim(ax.get_ylim()) # Make sure the y limits of the twin matches the autoscaled of the original.
twin_ax.set_xlim(ax.get_xlim()) # Make sure the x limits of the twin matches the autoscaled of the original.
fig.savefig('test.png')
As a generalisation of jam's answer, a collection object can be obtained from any of matplotlib's plotting functions and then re-added with autolim=False. For example,
fig, ax = plt.subplots()
x1 = np.linspace(-1,1,100)
# Get hold of collection
collection = ax.plot(x1, np.sin(x1))
# Remove collection from the plot
collection.remove()
# Rescale
ax.relim()
# Add the collection without autoscaling
ax.add_collection(collection, autolim=False)